diff --git a/dict.c b/dict.c index 7ae007e1..6e76bb7f 100644 --- a/dict.c +++ b/dict.c @@ -92,7 +92,7 @@ struct _xmlDictEntry { const xmlChar *name; unsigned int len; int valid; - unsigned long okey; + unsigned okey; }; typedef struct _xmlDictStrings xmlDictStrings; @@ -374,34 +374,28 @@ found_pool: * * Calculate a hash key using a good hash function that works well for * larger hash table sizes. - * - * Hash function by "One-at-a-Time Hash" see - * http://burtleburtle.net/bob/hash/doobs.html */ #ifdef __clang__ ATTRIBUTE_NO_SANITIZE("unsigned-integer-overflow") ATTRIBUTE_NO_SANITIZE("unsigned-shift-base") #endif -static uint32_t +static unsigned xmlDictComputeBigKey(const xmlChar* data, int namelen, unsigned seed) { - uint32_t hash; + unsigned h1, h2; int i; if (namelen <= 0 || data == NULL) return(0); - hash = seed; + HASH_INIT(h1, h2, seed); for (i = 0;i < namelen; i++) { - hash += data[i]; - hash += (hash << 10); - hash ^= (hash >> 6); + HASH_UPDATE(h1, h2, data[i]); } - hash += (hash << 3); - hash ^= (hash >> 11); - hash += (hash << 15); - return hash; + HASH_FINISH(h1, h2); + + return h2; } /* @@ -419,34 +413,27 @@ xmlDictComputeBigKey(const xmlChar* data, int namelen, unsigned seed) { ATTRIBUTE_NO_SANITIZE("unsigned-integer-overflow") ATTRIBUTE_NO_SANITIZE("unsigned-shift-base") #endif -static unsigned long +static unsigned xmlDictComputeBigQKey(const xmlChar *prefix, int plen, const xmlChar *name, int len, unsigned seed) { - uint32_t hash; + unsigned h1, h2; int i; - hash = seed; + HASH_INIT(h1, h2, seed); - for (i = 0;i < plen; i++) { - hash += prefix[i]; - hash += (hash << 10); - hash ^= (hash >> 6); + for (i = 0; i < plen; i++) { + HASH_UPDATE(h1, h2, prefix[i]); } - hash += ':'; - hash += (hash << 10); - hash ^= (hash >> 6); + HASH_UPDATE(h1, h2, ':'); - for (i = 0;i < len; i++) { - hash += name[i]; - hash += (hash << 10); - hash ^= (hash >> 6); + for (i = 0; i < len; i++) { + HASH_UPDATE(h1, h2, name[i]); } - hash += (hash << 3); - hash ^= (hash >> 11); - hash += (hash << 15); - return hash; + HASH_FINISH(h1, h2); + + return h2; } #endif /* WITH_BIG_KEY */ @@ -456,9 +443,13 @@ xmlDictComputeBigQKey(const xmlChar *prefix, int plen, * Calculate a hash key using a fast hash function that works well * for low hash table fill. */ -static unsigned long +#ifdef __clang__ +ATTRIBUTE_NO_SANITIZE("unsigned-integer-overflow") +ATTRIBUTE_NO_SANITIZE("unsigned-shift-base") +#endif +static unsigned xmlDictComputeFastKey(const xmlChar *name, int namelen, unsigned seed) { - unsigned long value = seed; + unsigned value = seed; if ((name == NULL) || (namelen <= 0)) return(value); @@ -500,11 +491,15 @@ xmlDictComputeFastKey(const xmlChar *name, int namelen, unsigned seed) { * * Neither of the two strings must be NULL. */ -static unsigned long +#ifdef __clang__ +ATTRIBUTE_NO_SANITIZE("unsigned-integer-overflow") +ATTRIBUTE_NO_SANITIZE("unsigned-shift-base") +#endif +static unsigned xmlDictComputeFastQKey(const xmlChar *prefix, int plen, const xmlChar *name, int len, unsigned seed) { - unsigned long value = seed; + unsigned value = seed; if (plen == 0) value += 30 * ':'; @@ -669,12 +664,12 @@ xmlDictReference(xmlDictPtr dict) { */ static int xmlDictGrow(xmlDictPtr dict, size_t size) { - unsigned long key, okey; + unsigned key, okey; size_t oldsize, i; xmlDictEntryPtr iter, next; struct _xmlDictEntry *olddict; #ifdef DEBUG_GROW - unsigned long nbElem = 0; + unsigned nbElem = 0; #endif int ret = 0; int keep_keys = 1; @@ -861,7 +856,7 @@ xmlDictFree(xmlDictPtr dict) { */ const xmlChar * xmlDictLookup(xmlDictPtr dict, const xmlChar *name, int len) { - unsigned long key, okey, nbi = 0; + unsigned key, okey, nbi = 0; xmlDictEntryPtr entry; xmlDictEntryPtr insert; const xmlChar *ret; @@ -914,7 +909,7 @@ xmlDictLookup(xmlDictPtr dict, const xmlChar *name, int len) { } if (dict->subdict) { - unsigned long skey; + unsigned skey; /* we cannot always reuse the same okey for the subdict */ if (((dict->size == MIN_DICT_SIZE) && @@ -1004,7 +999,7 @@ xmlDictLookup(xmlDictPtr dict, const xmlChar *name, int len) { */ const xmlChar * xmlDictExists(xmlDictPtr dict, const xmlChar *name, int len) { - unsigned long key, okey; + unsigned key, okey; xmlDictEntryPtr insert; unsigned int l; @@ -1053,7 +1048,7 @@ xmlDictExists(xmlDictPtr dict, const xmlChar *name, int len) { } if (dict->subdict) { - unsigned long skey; + unsigned skey; /* we cannot always reuse the same okey for the subdict */ if (((dict->size == MIN_DICT_SIZE) && @@ -1110,7 +1105,7 @@ xmlDictExists(xmlDictPtr dict, const xmlChar *name, int len) { */ const xmlChar * xmlDictQLookup(xmlDictPtr dict, const xmlChar *prefix, const xmlChar *name) { - unsigned long okey, key, nbi = 0; + unsigned okey, key, nbi = 0; xmlDictEntryPtr entry; xmlDictEntryPtr insert; const xmlChar *ret; @@ -1146,7 +1141,7 @@ xmlDictQLookup(xmlDictPtr dict, const xmlChar *prefix, const xmlChar *name) { } if (dict->subdict) { - unsigned long skey; + unsigned skey; /* we cannot always reuse the same okey for the subdict */ if (((dict->size == MIN_DICT_SIZE) && diff --git a/hash.c b/hash.c index 175a8137..e01069b1 100644 --- a/hash.c +++ b/hash.c @@ -81,88 +81,88 @@ struct _xmlHashTable { ATTRIBUTE_NO_SANITIZE("unsigned-integer-overflow") ATTRIBUTE_NO_SANITIZE("unsigned-shift-base") #endif -static unsigned long +static unsigned xmlHashComputeKey(xmlHashTablePtr table, const xmlChar *name, const xmlChar *name2, const xmlChar *name3) { - unsigned long value; - unsigned long ch; + unsigned h1, h2, ch; + + HASH_INIT(h1, h2, table->random_seed); - value = table->random_seed; if (name != NULL) { - value += 30 * (*name); while ((ch = *name++) != 0) { - value = value ^ ((value << 5) + (value >> 3) + ch); + HASH_UPDATE(h1, h2, ch); } } - value = value ^ ((value << 5) + (value >> 3)); + HASH_UPDATE(h1, h2, 0); if (name2 != NULL) { while ((ch = *name2++) != 0) { - value = value ^ ((value << 5) + (value >> 3) + ch); + HASH_UPDATE(h1, h2, ch); } } - value = value ^ ((value << 5) + (value >> 3)); + HASH_UPDATE(h1, h2, 0); if (name3 != NULL) { while ((ch = *name3++) != 0) { - value = value ^ ((value << 5) + (value >> 3) + ch); + HASH_UPDATE(h1, h2, ch); } } - return (value % table->size); + + HASH_FINISH(h1, h2); + + return (h2 % table->size); } #ifdef __clang__ ATTRIBUTE_NO_SANITIZE("unsigned-integer-overflow") ATTRIBUTE_NO_SANITIZE("unsigned-shift-base") #endif -static unsigned long +static unsigned xmlHashComputeQKey(xmlHashTablePtr table, const xmlChar *prefix, const xmlChar *name, const xmlChar *prefix2, const xmlChar *name2, const xmlChar *prefix3, const xmlChar *name3) { - unsigned long value; - unsigned long ch; + unsigned h1, h2, ch; - value = table->random_seed; - if (prefix != NULL) - value += 30 * (*prefix); - else - value += 30 * (*name); + HASH_INIT(h1, h2, table->random_seed); if (prefix != NULL) { while ((ch = *prefix++) != 0) { - value = value ^ ((value << 5) + (value >> 3) + ch); + HASH_UPDATE(h1, h2, ch); } - value = value ^ ((value << 5) + (value >> 3) + ':'); + HASH_UPDATE(h1, h2, ':'); } if (name != NULL) { while ((ch = *name++) != 0) { - value = value ^ ((value << 5) + (value >> 3) + ch); + HASH_UPDATE(h1, h2, ch); } } - value = value ^ ((value << 5) + (value >> 3)); + HASH_UPDATE(h1, h2, 0); if (prefix2 != NULL) { while ((ch = *prefix2++) != 0) { - value = value ^ ((value << 5) + (value >> 3) + ch); + HASH_UPDATE(h1, h2, ch); } - value = value ^ ((value << 5) + (value >> 3) + ':'); + HASH_UPDATE(h1, h2, ':'); } if (name2 != NULL) { while ((ch = *name2++) != 0) { - value = value ^ ((value << 5) + (value >> 3) + ch); + HASH_UPDATE(h1, h2, ch); } } - value = value ^ ((value << 5) + (value >> 3)); + HASH_UPDATE(h1, h2, 0); if (prefix3 != NULL) { while ((ch = *prefix3++) != 0) { - value = value ^ ((value << 5) + (value >> 3) + ch); + HASH_UPDATE(h1, h2, ch); } - value = value ^ ((value << 5) + (value >> 3) + ':'); + HASH_UPDATE(h1, h2, ':'); } if (name3 != NULL) { while ((ch = *name3++) != 0) { - value = value ^ ((value << 5) + (value >> 3) + ch); + HASH_UPDATE(h1, h2, ch); } } - return (value % table->size); + + HASH_FINISH(h1, h2); + + return (h2 % table->size); } /** @@ -232,12 +232,12 @@ xmlHashCreateDict(int size, xmlDictPtr dict) { */ static int xmlHashGrow(xmlHashTablePtr table, int size) { - unsigned long key; + unsigned key; int oldsize, i; xmlHashEntryPtr iter, next; struct _xmlHashEntry *oldtable; #ifdef DEBUG_GROW - unsigned long nbElem = 0; + unsigned nbElem = 0; #endif if (table == NULL) @@ -532,7 +532,7 @@ int xmlHashAddEntry3(xmlHashTablePtr table, const xmlChar *name, const xmlChar *name2, const xmlChar *name3, void *userdata) { - unsigned long key, len = 0; + unsigned key, len = 0; xmlHashEntryPtr entry; xmlHashEntryPtr insert; @@ -676,7 +676,7 @@ int xmlHashUpdateEntry3(xmlHashTablePtr table, const xmlChar *name, const xmlChar *name2, const xmlChar *name3, void *userdata, xmlHashDeallocator f) { - unsigned long key; + unsigned key; xmlHashEntryPtr entry; xmlHashEntryPtr insert; @@ -820,7 +820,7 @@ error: void * xmlHashLookup3(xmlHashTablePtr table, const xmlChar *name, const xmlChar *name2, const xmlChar *name3) { - unsigned long key; + unsigned key; xmlHashEntryPtr entry; if (table == NULL) @@ -866,7 +866,7 @@ xmlHashQLookup3(xmlHashTablePtr table, const xmlChar *prefix, const xmlChar *name, const xmlChar *prefix2, const xmlChar *name2, const xmlChar *prefix3, const xmlChar *name3) { - unsigned long key; + unsigned key; xmlHashEntryPtr entry; if (table == NULL) @@ -1142,7 +1142,7 @@ xmlHashRemoveEntry2(xmlHashTablePtr table, const xmlChar *name, int xmlHashRemoveEntry3(xmlHashTablePtr table, const xmlChar *name, const xmlChar *name2, const xmlChar *name3, xmlHashDeallocator f) { - unsigned long key; + unsigned key; xmlHashEntryPtr entry; xmlHashEntryPtr prev = NULL; diff --git a/include/private/dict.h b/include/private/dict.h index e35cfb23..9dcbc1d7 100644 --- a/include/private/dict.h +++ b/include/private/dict.h @@ -1,7 +1,47 @@ #ifndef XML_DICT_H_PRIVATE__ #define XML_DICT_H_PRIVATE__ +/* + * Values are ANDed with 0xFFFFFFFF to support platforms where + * unsigned is larger than 32 bits. With 32-bit unsigned values, + * modern compilers should optimize the operation away. + */ + #define HASH_ROL(x,n) ((x) << (n) | ((x) & 0xFFFFFFFF) >> (32 - (n))) +#define HASH_ROR(x,n) (((x) & 0xFFFFFFFF) >> (n) | (x) << (32 - (n))) + +/* + * GoodOAAT: One of a smallest non-multiplicative One-At-a-Time functions + * that passes SMHasher. + * + * Author: Sokolov Yura aka funny-falcon + */ + +#define HASH_INIT(h1, h2, seed) \ + do { \ + h1 = seed ^ 0x3b00; \ + h2 = HASH_ROL(seed, 15); \ + } while (0) + +#define HASH_UPDATE(h1, h2, ch) \ + do { \ + h1 += ch; \ + h1 += h1 << 3; \ + h2 += h1; \ + h2 = HASH_ROL(h2, 7); \ + h2 += h2 << 2; \ + } while (0) + +/* Result is in h2 */ +#define HASH_FINISH(h1, h2) \ + do { \ + h1 ^= h2; \ + h1 += HASH_ROL(h2, 14); \ + h2 ^= h1; h2 += HASH_ROR(h1, 6); \ + h1 ^= h2; h1 += HASH_ROL(h2, 5); \ + h2 ^= h1; h2 += HASH_ROR(h1, 8); \ + h2 &= 0xFFFFFFFF; \ + } while (0) XML_HIDDEN void xmlInitDictInternal(void); diff --git a/testdict.c b/testdict.c index f731a98d..9f058489 100644 --- a/testdict.c +++ b/testdict.c @@ -22,9 +22,9 @@ static const char *seeds2[] = { NULL }; -#define NB_STRINGS_MAX 10000 -#define NB_STRINGS_NS 1000 -#define NB_STRINGS_PREFIX 50 +#define NB_STRINGS_MAX 100000 +#define NB_STRINGS_NS 10000 +#define NB_STRINGS_PREFIX (NB_STRINGS_NS / 20) #define NB_STRINGS_MIN 10 static xmlChar **strings1;