diff options
-rw-r--r-- | ChangeLog | 13 | ||||
-rw-r--r-- | include/libast.h | 27 | ||||
-rw-r--r-- | src/builtin_hashes.c | 193 | ||||
-rw-r--r-- | test/Makefile.am | 2 | ||||
-rw-r--r-- | test/perf.c | 285 | ||||
-rw-r--r-- | test/test.c | 315 |
6 files changed, 660 insertions, 175 deletions
@@ -553,3 +553,16 @@ Wed Jan 21 18:19:49 2004 Michael Jennings (mej) Adding hash functions in preparation for a hash table implementation. ---------------------------------------------------------------------- +Thu Jan 22 20:42:40 2004 Michael Jennings (mej) + +Added a few new hashes. + +Fixed a typo in the old hashes. + +LibAST-ized the hash tests and made new performance tests. + +Anybody care to comment on the validity of my performance tests? All +6 algorithms end up with pretty much the same results. Do I need to +use a dictionary instead of random "words?" That's kinda what I'm +thinking.... +---------------------------------------------------------------------- diff --git a/include/libast.h b/include/libast.h index 989ddd2..70497be 100644 --- a/include/libast.h +++ b/include/libast.h @@ -2590,7 +2590,7 @@ extern spifopt_settings_t spifopt_settings; * @return Number of hash buckets required. * */ -#define JENKINS_HASH_SIZE(n) (SPIF_CAST(uint32) (1UL << (n))) +#define SPIFHASH_SIZE(n) (SPIF_CAST(uint32) (1UL << (n))) /** * Calculate mask to apply to hash key to get lowest n bits. @@ -2603,7 +2603,7 @@ extern spifopt_settings_t spifopt_settings; * @return Bitmask to zero out all but the n lowest bits. * */ -#define JENKINS_HASH_MASK(n) (SPIF_CAST(uint32) (JENKINS_HASH_SIZE(n) - 1)) +#define SPIFHASH_MASK(n) (SPIF_CAST(uint32) (SPIFHASH_SIZE(n) - 1)) /** * Mix 3 32-bit integer values in a reproducible, reversible manner. @@ -2621,7 +2621,7 @@ extern spifopt_settings_t spifopt_settings; * @param b A 32-bit integer. * @param c A 32-bit integer. */ -#define JENKINS_MIX(a,b,c) \ +#define SPIFHASH_JENKINS_MIX(a,b,c) \ { \ a -= b; a -= c; a ^= (c>>13); \ b -= c; b -= a; b ^= (a<<8); \ @@ -2633,6 +2633,16 @@ extern spifopt_settings_t spifopt_settings; b -= c; b -= a; b ^= (a<<10); \ c -= a; c -= b; c ^= (b>>15); \ } + +/** + * A pointer to a hash function. + * + * This type is used to refer to any of the spifhash_* functions. A + * variable of this type can point to any of the built-in hash + * functions in LibAST interchangeably. + * + */ +typedef spif_uint32_t (*spifhash_func_t)(spif_uint8_t *, spif_uint32_t, spif_uint32_t); /*@}*/ @@ -2725,13 +2735,16 @@ extern char *strsep(char **, char *); #endif /* builtin_hashes.c */ -spif_uint32_t jenkins_hash(register spif_uint8_t *key, register spif_uint32_t length, register spif_uint32_t seed); -spif_uint32_t jenkins_hash32(register spif_uint32_t *key, register spif_uint32_t length, register spif_uint32_t seed); +extern spif_uint32_t spifhash_jenkins(register spif_uint8_t *key, register spif_uint32_t length, register spif_uint32_t seed); +extern spif_uint32_t spifhash_jenkins32(spif_uint8_t *key, register spif_uint32_t length, register spif_uint32_t seed); #if WORDS_BIGENDIAN -# define jenkins_hashLE(k, l, s) jenkins_hash((k), (l), (s)) +# define spifhash_jenkinsLE(k, l, s) spifhash_jenkins((k), (l), (s)) #else -spif_uint32_t jenkins_hashLE(register spif_uint8_t *key, register spif_uint32_t length, register spif_uint32_t seed); +extern spif_uint32_t spifhash_jenkinsLE(register spif_uint8_t *key, register spif_uint32_t length, register spif_uint32_t seed); #endif +extern spif_uint32_t spifhash_rotating(spif_uint8_t *key, spif_uint32_t len, spif_uint32_t seed); +extern spif_uint32_t spifhash_one_at_a_time(spif_uint8_t *key, spif_uint32_t len, spif_uint32_t seed); +extern spif_uint32_t spifhash_fnv(spif_uint8_t *key, spif_uint32_t len, spif_uint32_t seed); /* conf.c */ extern void spifconf_init_subsystem(void); diff --git a/src/builtin_hashes.c b/src/builtin_hashes.c index ef07ea7..eeb44a1 100644 --- a/src/builtin_hashes.c +++ b/src/builtin_hashes.c @@ -29,6 +29,8 @@ static const char cvs_ident[] = "$Id$"; #include <libast_internal.h> +#define BUILTIN_RANDOM_SEED (SPIF_CAST(uint32) 0xf721b64d) + /* * Bob Jenkins' hash algorithm as published in December 1996. Public * domain. See http://burtleburtle.net/bob/hash/ @@ -43,8 +45,8 @@ static const char cvs_ident[] = "$Id$"; * instructions for an n-byte key. * * For a hash value of w bits, the returned value should be - * bitwise-AND'd with JENKINS_HASH_MASK(w), and the hash table should - * have JENKINS_HASH_SIZE(w) buckets. + * bitwise-AND'd with SPIFHASH_MASK(w), and the hash table should + * have SPIFHASH_SIZE(w) buckets. * * @param key Pointer to bitstream holding key. * @param length Number of bytes in bitstream. @@ -54,12 +56,12 @@ static const char cvs_ident[] = "$Id$"; * */ spif_uint32_t -jenkins_hash(register spif_uint8_t *key, register spif_uint32_t length, register spif_uint32_t seed) +spifhash_jenkins(register spif_uint8_t *key, register spif_uint32_t length, register spif_uint32_t seed) { register spif_uint32_t a, b, c, len; len = length; - a = b = 0xf721b64d; /* This can be any 32-bit value. */ + a = b = BUILTIN_RANDOM_SEED; /* This can be any 32-bit value. */ c = seed; /* The loop below handles most of the key (all but the last @@ -68,7 +70,7 @@ jenkins_hash(register spif_uint8_t *key, register spif_uint32_t length, register a += (key[0] + (SPIF_CAST(uint32) key[1] << 8) + (SPIF_CAST(uint32) key[2] << 16) + (SPIF_CAST(uint32) key[3] << 24)); b += (key[4] + (SPIF_CAST(uint32) key[5] << 8) + (SPIF_CAST(uint32) key[6] << 16) + (SPIF_CAST(uint32) key[7] << 24)); c += (key[8] + (SPIF_CAST(uint32) key[9] << 8) + (SPIF_CAST(uint32) key[10] << 16) + (SPIF_CAST(uint32) key[11] << 24)); - JENKINS_MIX(a, b, c); + SPIFHASH_JENKINS_MIX(a, b, c); key += 12; len -= 12; } @@ -90,7 +92,7 @@ jenkins_hash(register spif_uint8_t *key, register spif_uint32_t length, register case 1: a += key[0]; /* case 0: nothing left to add */ } - JENKINS_MIX(a, b, c); + SPIFHASH_JENKINS_MIX(a, b, c); return c; } @@ -100,10 +102,10 @@ jenkins_hash(register spif_uint8_t *key, register spif_uint32_t length, register * * This function hashes a series of 32-bit unsigned integers of a * given length into a 32-bit hash value suitable for use in hash - * tables. This hash is basically identical to jenkins_hash(), except + * tables. This hash is basically identical to spifhash_jenkins(), except * that the key length must be a whole number of 32-bit dword's, and * the length is given in spif_uint32_t's, not bytes. It is much - * faster than jenkins_hash(), so if padding keys to 32 bit chunks is + * faster than spifhash_jenkins(), so if padding keys to 32 bit chunks is * inexpensive, this function is probably preferable. * * @param key Pointer to bitstream holding key. @@ -114,22 +116,23 @@ jenkins_hash(register spif_uint8_t *key, register spif_uint32_t length, register * */ spif_uint32_t -jenkins_hash32(register spif_uint32_t *key, register spif_uint32_t length, register spif_uint32_t seed) +spifhash_jenkins32(spif_uint8_t *key, register spif_uint32_t length, register spif_uint32_t seed) { register spif_uint32_t a, b, c, len; + register spif_uint32_t *key_dword = SPIF_CAST_PTR(uint32) key; len = length; - a = b = 0xf721b64d; /* This can be any 32-bit value. */ + a = b = BUILTIN_RANDOM_SEED; /* This can be any 32-bit value. */ c = seed; /* The loop below handles most of the key (all but the last length % 3 uint32's). */ while (len >= 3) { - a += key[0]; - b += key[1]; - c += key[2]; - JENKINS_MIX(a, b, c); - key += 3; + a += key_dword[0]; + b += key_dword[1]; + c += key_dword[2]; + SPIFHASH_JENKINS_MIX(a, b, c); + key_dword += 3; len -= 3; } @@ -137,11 +140,11 @@ jenkins_hash32(register spif_uint32_t *key, register spif_uint32_t length, regis uint32's. All cases drop through to the next case. */ c += length; switch (len) { - case 2: b += key[1]; - case 1: a += key[0]; + case 2: b += key_dword[1]; + case 1: a += key_dword[0]; /* case 0: nothing left to add */ } - JENKINS_MIX(a, b, c); + SPIFHASH_JENKINS_MIX(a, b, c); return c; } @@ -152,9 +155,9 @@ jenkins_hash32(register spif_uint32_t *key, register spif_uint32_t length, regis * * This function hashes a bitstream of a given length into a 32-bit * hash value suitable for use in hash tables. This hash is basically - * identical to jenkins_hash(), except that it only works on + * identical to spifhash_jenkins(), except that it only works on * little-endian machines (e.g., Intel x86 and VAX). It is faster - * than jenkins_hash(), so it should be preferred on little-endian + * than spifhash_jenkins(), so it should be preferred on little-endian * systems. * * @param key Pointer to bitstream holding key. @@ -165,12 +168,12 @@ jenkins_hash32(register spif_uint32_t *key, register spif_uint32_t length, regis * */ spif_uint32_t -jenkins_hashLE(register spif_uint8_t *key, register spif_uint32_t length, register spif_uint32_t seed) +spifhash_jenkinsLE(register spif_uint8_t *key, register spif_uint32_t length, register spif_uint32_t seed) { register spif_uint32_t a, b, c, len; len = length; - a = b = 0xf721b64d; /* This can be any 32-bit value. */ + a = b = BUILTIN_RANDOM_SEED; /* This can be any 32-bit value. */ c = seed; /* The loop below handles most of the key (all but the last @@ -181,7 +184,7 @@ jenkins_hashLE(register spif_uint8_t *key, register spif_uint32_t length, regist a += (key[0] + (SPIF_CAST(uint32) key[1] << 8) + (SPIF_CAST(uint32) key[2] << 16) + (SPIF_CAST(uint32) key[3] << 24)); b += (key[4] + (SPIF_CAST(uint32) key[5] << 8) + (SPIF_CAST(uint32) key[6] << 16) + (SPIF_CAST(uint32) key[7] << 24)); c += (key[8] + (SPIF_CAST(uint32) key[9] << 8) + (SPIF_CAST(uint32) key[10] << 16) + (SPIF_CAST(uint32) key[11] << 24)); - JENKINS_MIX(a, b, c); + SPIFHASH_JENKINS_MIX(a, b, c); key += 12; len -= 12; } @@ -189,11 +192,11 @@ jenkins_hashLE(register spif_uint8_t *key, register spif_uint32_t length, regist /* 32-bit aligned. Use speedier method. */ while (len >= 12) { /* These three lines are the only ones which differ from - jenkins_hash(). */ - a += *key; + spifhash_jenkins(). */ + a += *(SPIF_CAST_PTR(uint32) key); b += *(SPIF_CAST_PTR(uint32) (key + 4)); c += *(SPIF_CAST_PTR(uint32) (key + 8)); - JENKINS_MIX(a, b, c); + SPIFHASH_JENKINS_MIX(a, b, c); key += 12; len -= 12; } @@ -203,21 +206,133 @@ jenkins_hashLE(register spif_uint8_t *key, register spif_uint32_t length, regist bytes. All cases drop through to the next case. */ c += length; switch (len) { - case 11: c += (SPIF_CAST(uint32) key[10] << 24); - case 10: c += (SPIF_CAST(uint32) key[9] << 16); - case 9: c += (SPIF_CAST(uint32) key[8] << 8); - case 8: b += (SPIF_CAST(uint32) key[7] << 24); - case 7: b += (SPIF_CAST(uint32) key[6] << 16); - case 6: b += (SPIF_CAST(uint32) key[5] << 8); - case 5: b += key[4]; - case 4: a += (SPIF_CAST(uint32) key[3] << 24); - case 3: a += (SPIF_CAST(uint32) key[2] << 16); - case 2: a += (SPIF_CAST(uint32) key[1] << 8); - case 1: a += key[0]; - /* case 0: nothing left to add */ + case 11: + c += (SPIF_CAST(uint32) key[10] << 24); + case 10: + c += (SPIF_CAST(uint32) key[9] << 16); + case 9: + c += (SPIF_CAST(uint32) key[8] << 8); + case 8: + b += (SPIF_CAST(uint32) key[7] << 24); + case 7: + b += (SPIF_CAST(uint32) key[6] << 16); + case 6: + b += (SPIF_CAST(uint32) key[5] << 8); + case 5: + b += key[4]; + case 4: + a += (SPIF_CAST(uint32) key[3] << 24); + case 3: + a += (SPIF_CAST(uint32) key[2] << 16); + case 2: + a += (SPIF_CAST(uint32) key[1] << 8); + case 1: + a += key[0]; + /* case 0: nothing left to add */ } - JENKINS_MIX(a, b, c); + SPIFHASH_JENKINS_MIX(a, b, c); return c; } #endif + + +/** + * The rotating hash. + * + * This is an implementation of a rotating hash algorithm with a + * slight modification so that it can be used with a 32-bit bitmask + * and a power-of-two table size (so as to avoid the expensive + * "hash % prime" step). + * + * This algorithm uses 8n+6 instructions to hash a key of n bytes. + * The mix step is a left bitwise rotation by 4, so systems with a + * rotate instruction will save 2 instructions per loop. + * + * @param key The arbitrary-length key. + * @param len The length of the key, in bytes. + * @param seed An arbitrary seed value. + * @return A 32-bit hash value. + * + */ +spif_uint32_t +spifhash_rotating(spif_uint8_t *key, spif_uint32_t len, spif_uint32_t seed) +{ + spif_uint32_t hash, i; + + if (!seed) { + seed = BUILTIN_RANDOM_SEED; + } + for (hash = seed, i = 0; i < len; i++) { + hash = (hash << 4) ^ (hash >> 28) ^ key[i]; + } + return (hash ^ (hash >> 10) ^ (hash >> 20)); +} + +/** + * The one-at-a-time hash. + * + * This is an implementation of the one-at-a-time hash. It is similar + * to spifhash_rotating(). It uses 9n+9 instructions to hash a key of n + * bytes. A full 32-bit key is produced. No funneling weaknesses are + * known. + * + * @param key The arbitrary-length key. + * @param len The length of the key, in bytes. + * @param seed An arbitrary seed value. + * @return A 32-bit hash value. + * + */ +spif_uint32_t +spifhash_one_at_a_time(spif_uint8_t *key, spif_uint32_t len, spif_uint32_t seed) +{ + spif_uint32_t hash, i; + + if (!seed) { + seed = BUILTIN_RANDOM_SEED; + } + for (hash = seed, i = 0; i < len; i++) { + hash += key[i]; + hash += (hash << 10); + hash ^= (hash >> 6); + } + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + return hash; +} + +/** + * The FNV hash. + * + * This is the Fowler/Noll/Vo (FNV) hash. This code is a modified + * version of that which appears at + * http://www.isthe.com/chongo/tech/comp/fnv/ + * + * @param key The arbitrary-length key. + * @param len The length of the key, in bytes. + * @param seed An arbitrary seed value. + * @return A 32-bit hash value. + * + */ +spif_uint32_t +spifhash_fnv(spif_uint8_t *key, spif_uint32_t len, spif_uint32_t seed) +{ + spif_uint8_t *key_end = key + len; + spif_uint32_t hash; + + if (!seed) { + seed = SPIF_CAST(uint32) 0x811c9dc5; /* FNV-1a 32-bit init. */ + } + for (hash = seed; key < key_end; key++) { + hash ^= SPIF_CAST(uint32) (*key); + +#ifdef __GNUC__ + hash += (hash << 1) + (hash << 4) + (hash << 7) + (hash << 8) + (hash << 24); +#else + hash *= SPIF_CAST(uint32) 0x01000193; /* 32-bit FNV-1 prime. */ +#endif + } + + return hash; +} diff --git a/test/Makefile.am b/test/Makefile.am index 5b97149..1ad7342 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -16,6 +16,8 @@ test: libast-test $(top_builddir)/test/libast-test perf: libast-perf +# Uncomment the following to run under gdb +# vi $(top_builddir)/test/libast-perf $(top_builddir)/test/libast-perf $(PERFOPTS) .PHONY: test diff --git a/test/perf.c b/test/perf.c index 789213e..959d114 100644 --- a/test/perf.c +++ b/test/perf.c @@ -38,6 +38,7 @@ size_t rep_cnt = 0, rep_mult = 100; int perf_macros(void); int perf_strings(void); +int perf_hashes(void); int perf_options(void); int perf_obj(void); int perf_str(void); @@ -131,6 +132,265 @@ perf_strings(void) } int +perf_hashes(void) +{ + spif_uint8_t i; + + for (i = 0; i < 6; i++) { + spifhash_func_t hash_func; + spif_uint32_t key_length, hash, j; + + if (i == 0) { + PERF_NOTICE("*** Profiling Jenkins hash:"); + hash_func = spifhash_jenkins; + } else if (i == 1) { + PERF_NOTICE("*** Profiling Jenkins32 hash:"); + hash_func = spifhash_jenkins32; + } else if (i == 2) { +#if WORDS_BIGENDIAN + continue; +#else + PERF_NOTICE("*** Profiling JenkinsLE hash:"); + hash_func = spifhash_jenkinsLE; +#endif + } else if (i == 3) { + PERF_NOTICE("*** Profiling rotating hash:"); + hash_func = spifhash_rotating; + } else if (i == 4) { + PERF_NOTICE("*** Profiling one-at-a-time hash:"); + hash_func = spifhash_one_at_a_time; + } else if (i == 5) { + PERF_NOTICE("*** Profiling FNV hash:"); + hash_func = spifhash_fnv; + } + + /*** + *** The below tests hash speed for small, medium, and + *** large randomly-generated keys. + ***/ + PERF_SET_REPS(1000); + + /* + * Raw hash speed for fixed-length randomly-generated key. + */ +#define KEYLEN 32 + do { + spif_uint8_t buff[KEYLEN]; + + for (key_length = 0; key_length < KEYLEN; key_length++) { + buff[key_length] = SPIF_CAST(uint8) (rand() & 0xff); + } + PERF_BEGIN("hash speed for 32-byte key"); + if (hash_func == spifhash_jenkins32) { + PERF_TEST(hash = hash_func(buff, KEYLEN / 4, hash);); + } else { + PERF_TEST(hash = hash_func(buff, KEYLEN, hash);); + } + PERF_END(); + } while (0); +#undef KEYLEN + +#define KEYLEN 512 + do { + spif_uint8_t buff[KEYLEN]; + + for (key_length = 0; key_length < KEYLEN; key_length++) { + buff[key_length] = SPIF_CAST(uint8) (rand() & 0xff); + } + PERF_BEGIN("hash speed for 512-byte key"); + if (hash_func == spifhash_jenkins32) { + PERF_TEST(hash = hash_func(buff, KEYLEN / 4, hash);); + } else { + PERF_TEST(hash = hash_func(buff, KEYLEN, hash);); + } + PERF_END(); + } while (0); +#undef KEYLEN + +#define KEYLEN 8192 + do { + spif_uint8_t buff[KEYLEN]; + + for (key_length = 0; key_length < KEYLEN; key_length++) { + buff[key_length] = SPIF_CAST(uint8) (rand() & 0xff); + } + PERF_BEGIN("hash speed for 8192-byte key"); + if (hash_func == spifhash_jenkins32) { + PERF_TEST(hash = hash_func(buff, KEYLEN / 4, hash);); + } else { + PERF_TEST(hash = hash_func(buff, KEYLEN, hash);); + } + PERF_END(); + } while (0); +#undef KEYLEN + + /*** + *** These tests measure collision rate and distribution + *** across a varying number of hash buckets for varying + *** numbers of randomly-generated alphabetical keys. + ***/ + +#define KEYLEN 8 +#define HASH_BITS 16 + /* + * Hash distribution for a 16-bit hash and an 8-character random word. + */ +#define KEYCNT 64 + do { + spif_uint32_t buckets[SPIFHASH_SIZE(HASH_BITS)]; + spif_uint32_t c_total, c_min = SPIF_CAST(uint32) (-1), c_max = 0; + double c_prob, avg_size; + + MEMSET(buckets, 0, sizeof(buckets)); + printf("Profiling %lu-bucket distribution for %lu-bit hashes of %lu %lu-byte random words...", + SPIF_CAST_C(unsigned long) SPIFHASH_SIZE(HASH_BITS), + SPIF_CAST_C(unsigned long) HASH_BITS, + SPIF_CAST_C(unsigned long) KEYCNT, + SPIF_CAST_C(unsigned long) KEYLEN); + for (j = 0; j < KEYCNT; j++) { + spif_uint8_t buff[KEYLEN]; + + for (key_length = 0; key_length < KEYLEN; key_length++) { + buff[key_length] = SPIF_CAST(uint8) (rand() % 26 + 'a'); + } + if (hash_func == spifhash_jenkins32) { + hash = hash_func(buff, KEYLEN / 4, hash); + } else { + hash = hash_func(buff, KEYLEN, hash); + } + buckets[((hash >> HASH_BITS) ^ (hash & 0xffff))]++; + } + tnum = j; + for (j = 0, c_total = 0, avg_size = 0; j < SPIFHASH_SIZE(HASH_BITS); j++) { + spif_uint32_t c; + + c = buckets[j]; + AT_LEAST(c_max, c); + AT_MOST(c_min, c); + + if (c) { + if (--c) { + c_total += c; + } + } + } + avg_size /= SPIFHASH_SIZE(HASH_BITS); + c_prob = ((SPIF_CAST_C(double) c_total) / KEYCNT * 100.0); + printf("%lu collisions (%lu/%lu/%.5le), %.2lf%% probability.\n", + SPIF_CAST_C(unsigned long) c_total, + SPIF_CAST_C(unsigned long) c_min, + SPIF_CAST_C(unsigned long) c_max, + avg_size, c_prob); + } while (0); +#undef KEYCNT + +#define KEYCNT 2048 + do { + spif_uint32_t buckets[SPIFHASH_SIZE(HASH_BITS)]; + spif_uint32_t c_total, c_min = SPIF_CAST(uint32) (-1), c_max = 0; + double c_prob, avg_size; + + MEMSET(buckets, 0, sizeof(buckets)); + printf("Profiling %lu-bucket distribution for %lu-bit hashes of %lu %lu-byte random words...", + SPIF_CAST_C(unsigned long) SPIFHASH_SIZE(HASH_BITS), + SPIF_CAST_C(unsigned long) HASH_BITS, + SPIF_CAST_C(unsigned long) KEYCNT, + SPIF_CAST_C(unsigned long) KEYLEN); + for (j = 0; j < KEYCNT; j++) { + spif_uint8_t buff[KEYLEN]; + + for (key_length = 0; key_length < KEYLEN; key_length++) { + buff[key_length] = SPIF_CAST(uint8) (rand() % 26 + 'a'); + } + if (hash_func == spifhash_jenkins32) { + hash = hash_func(buff, KEYLEN / 4, hash); + } else { + hash = hash_func(buff, KEYLEN, hash); + } + buckets[((hash >> HASH_BITS) ^ (hash & 0xffff))]++; + } + tnum = j; + for (j = 0, c_total = 0, avg_size = 0; j < SPIFHASH_SIZE(HASH_BITS); j++) { + spif_uint32_t c; + + c = buckets[j]; + AT_LEAST(c_max, c); + AT_MOST(c_min, c); + + if (c) { + if (--c) { + c_total += c; + } + } + } + avg_size /= SPIFHASH_SIZE(HASH_BITS); + c_prob = ((SPIF_CAST_C(double) c_total) / KEYCNT * 100.0); + printf("%lu collisions (%lu/%lu/%.5le), %.2lf%% probability.\n", + SPIF_CAST_C(unsigned long) c_total, + SPIF_CAST_C(unsigned long) c_min, + SPIF_CAST_C(unsigned long) c_max, + avg_size, c_prob); + } while (0); +#undef KEYCNT + +#define KEYCNT 32768 + do { + spif_uint32_t buckets[SPIFHASH_SIZE(HASH_BITS)]; + spif_uint32_t c_total, c_min = SPIF_CAST(uint32) (-1), c_max = 0; + double c_prob, avg_size; + + MEMSET(buckets, 0, sizeof(buckets)); + printf("Profiling %lu-bucket distribution for %lu-bit hashes of %lu %lu-byte random words...", + SPIF_CAST_C(unsigned long) SPIFHASH_SIZE(HASH_BITS), + SPIF_CAST_C(unsigned long) HASH_BITS, + SPIF_CAST_C(unsigned long) KEYCNT, + SPIF_CAST_C(unsigned long) KEYLEN); + for (j = 0; j < KEYCNT; j++) { + spif_uint8_t buff[KEYLEN]; + + for (key_length = 0; key_length < KEYLEN; key_length++) { + buff[key_length] = SPIF_CAST(uint8) (rand() % 26 + 'a'); + } + if (hash_func == spifhash_jenkins32) { + hash = hash_func(buff, KEYLEN / 4, hash); + } else { + hash = hash_func(buff, KEYLEN, hash); + } + buckets[((hash >> HASH_BITS) ^ (hash & 0xffff))]++; + } + tnum = j; + for (j = 0, c_total = 0, avg_size = 0; j < SPIFHASH_SIZE(HASH_BITS); j++) { + spif_uint32_t c; + + c = buckets[j]; + AT_LEAST(c_max, c); + AT_MOST(c_min, c); + + if (c) { + if (--c) { + c_total += c; + } + } + } + avg_size /= SPIFHASH_SIZE(HASH_BITS); + c_prob = ((SPIF_CAST_C(double) c_total) / KEYCNT * 100.0); + printf("%lu collisions (%lu/%lu/%.5le), %.2lf%% probability.\n", + SPIF_CAST_C(unsigned long) c_total, + SPIF_CAST_C(unsigned long) c_min, + SPIF_CAST_C(unsigned long) c_max, + avg_size, c_prob); + } while (0); +#undef KEYCNT + +#undef HASH_BITS + +#undef KEYLEN + } + PERF_ENDED("hash function"); + return 0; +} + +int perf_options(void) { spif_uint32_t test_flag_var = 0; @@ -329,7 +589,7 @@ perf_str(void) teststr = spif_str_new_from_ptr("abcdefg"); PERF_BEGIN("spif_str_clear() function"); - PERF_TEST(spif_str_clear(teststr, SPIF_CAST_C(char) (rand() % 256));); + PERF_TEST(spif_str_clear(teststr, SPIF_CAST_C(char) (rand() & 0xff));); PERF_END(); spif_str_del(teststr); @@ -447,30 +707,30 @@ perf_list(void) for (i = 0; i < 3; i++) { if (i == 0) { - PERF_NOTICE("Testing list interface class, linked_list instance:"); + PERF_NOTICE("*** Profiling list interface class, linked_list instance:"); testlist = SPIF_LIST_NEW(linked_list); } else if (i == 1) { - PERF_NOTICE("Testing list interface class, dlinked_list instance:"); + PERF_NOTICE("*** Profiling list interface class, dlinked_list instance:"); testlist = SPIF_LIST_NEW(dlinked_list); } else if (i == 2) { - PERF_NOTICE("Testing list interface class, array instance:"); + PERF_NOTICE("*** Profiling list interface class, array instance:"); testlist = SPIF_LIST_NEW(array); } else if (i == 3) { } PERF_BEGIN("SPIF_LIST_APPEND() macro"); - buff[0] = SPIF_CAST_C(char) (rand() % 256); + buff[0] = SPIF_CAST_C(char) (rand() & 0xff); PERF_TEST(SPIF_LIST_APPEND(testlist, spif_str_new_from_ptr(buff));); PERF_END(); PERF_BEGIN("SPIF_LIST_PREPEND() macro"); - buff[0] = SPIF_CAST_C(char) (rand() % 256); + buff[0] = SPIF_CAST_C(char) (rand() & 0xff); PERF_TEST(SPIF_LIST_PREPEND(testlist, spif_str_new_from_ptr(buff));); PERF_END(); s = spif_str_new(); PERF_BEGIN("SPIF_LIST_CONTAINS() macro"); - buff[0] = SPIF_CAST_C(char) (rand() % 256); + buff[0] = SPIF_CAST_C(char) (rand() & 0xff); spif_str_init_from_ptr(s, buff); PERF_TEST(SPIF_LIST_CONTAINS(testlist, s)); spif_str_done(s); @@ -489,7 +749,7 @@ perf_list(void) s = spif_str_new(); PERF_BEGIN("SPIF_LIST_INDEX() macro"); - buff[0] = SPIF_CAST_C(char) (rand() % 256); + buff[0] = SPIF_CAST_C(char) (rand() & 0xff); spif_str_init_from_ptr(s, buff); PERF_TEST(SPIF_LIST_INDEX(testlist, s)); spif_str_done(s); @@ -497,20 +757,20 @@ perf_list(void) spif_str_del(s); PERF_BEGIN("SPIF_LIST_INSERT() macro"); - buff[0] = SPIF_CAST_C(char) (rand() % 256); + buff[0] = SPIF_CAST_C(char) (rand() & 0xff); PERF_TEST(SPIF_LIST_INSERT(testlist, spif_str_new_from_ptr(buff));); PERF_END(); len = SPIF_LIST_COUNT(testlist); PERF_BEGIN("SPIF_LIST_INSERT_AT() macro"); - buff[0] = SPIF_CAST_C(char) (rand() % 256); + buff[0] = SPIF_CAST_C(char) (rand() & 0xff); idx = rand() % len; PERF_TEST(SPIF_LIST_INSERT_AT(testlist, spif_str_new_from_ptr(buff), idx);); PERF_END(); s = spif_str_new(); PERF_BEGIN("SPIF_LIST_REMOVE() macro"); - buff[0] = SPIF_CAST_C(char) (rand() % 256); + buff[0] = SPIF_CAST_C(char) (rand() & 0xff); spif_str_init_from_ptr(s, buff); PERF_TEST(s2 = SPIF_CAST(str) SPIF_LIST_REMOVE(testlist, s); if (!SPIF_STR_ISNULL(s2)) {spif_str_del(s2);}); spif_str_done(s); @@ -553,6 +813,9 @@ main(int argc, char *argv[]) if ((ret = perf_strings()) != 0) { return ret; } + if ((ret = perf_hashes()) != 0) { + return ret; + } if ((ret = perf_options()) != 0) { return ret; } diff --git a/test/test.c b/test/test.c index f59625e..bc8ea9e 100644 --- a/test/test.c +++ b/test/test.c @@ -218,141 +218,220 @@ test_strings(void) return 0; } -#define HASHSTATE 1 -#define HASHLEN 1 #define MAXPAIR 80 -#define MAXLEN 70 +#define MAXLEN 80 int test_hash_functions(void) { - TEST_BEGIN("Jenkins hash functions"); - { - spif_uint32_t buf[256]; - spif_uint32_t i; - spif_uint32_t h = 0; + spif_uint8_t i; + + for (i = 0; i < 6; i++) { + spifhash_func_t hash_func; + spif_uint32_t key_length, input_byte, trials, successes = 0, hash, ref_hash; + spif_uint8_t buff[MAXLEN + 20], *pbuff; + spif_uint8_t align1[] = "This has got the amazing aroma of bovine fecal matter..."; + spif_uint8_t align2[] = "xThis has got the amazing aroma of bovine fecal matter..."; + spif_uint8_t align3[] = "xxThis has got the amazing aroma of bovine fecal matter..."; + spif_uint8_t align4[] = "xxxThis has got the amazing aroma of bovine fecal matter..."; + spif_uint32_t j; - for (i = 0; i < 256; i++) { - h = jenkins_hash(buf, i, h); - } - TEST_FAIL_IF(h == 0); - for (i = 0; i < 256; i++) { - h = jenkins_hash32(buf, i, h); + if (i == 0) { + TEST_NOTICE("*** Testing Jenkins hash:"); + hash_func = spifhash_jenkins; + } else if (i == 1) { + TEST_NOTICE("*** Testing Jenkins32 hash:"); + hash_func = spifhash_jenkins32; + } else if (i == 2) { +#if WORDS_BIGENDIAN + continue; +#else + TEST_NOTICE("*** Testing JenkinsLE hash:"); + hash_func = spifhash_jenkinsLE; +#endif + } else if (i == 3) { + TEST_NOTICE("*** Testing rotating hash:"); + hash_func = spifhash_rotating; + } else if (i == 4) { + TEST_NOTICE("*** Testing one-at-a-time hash:"); + hash_func = spifhash_one_at_a_time; + } else if (i == 5) { + TEST_NOTICE("*** Testing FNV hash:"); + hash_func = spifhash_fnv; } - TEST_FAIL_IF(h == 0); - } - { - spif_uint8_t qa[MAXLEN + 1], qb[MAXLEN + 2], *a = &qa[0], *b = &qb[1]; - spif_uint32_t c[HASHSTATE], d[HASHSTATE], i, j = 0, k, l, m, z; - spif_uint32_t e[HASHSTATE], f[HASHSTATE], g[HASHSTATE], h[HASHSTATE]; - spif_uint32_t x[HASHSTATE], y[HASHSTATE]; - spif_uint32_t hlen; - - /*printf("No more than %d trials should ever be needed\n", MAXPAIR / 2);*/ - for (hlen = 0; hlen < MAXLEN; hlen++) { - z = 0; - for (i = 0; i < hlen; i++) { - /*----------------------- for each input byte, */ - for (j = 0; j < 8; j++) { - /*------------------------ for each input bit, */ - for (m = 1; m < 8; m++) { - /*------------ for several possible seeds, */ - for (l = 0; l < HASHSTATE; l++) - e[l] = f[l] = g[l] = h[l] = x[l] = y[l] = ~(SPIF_CAST(uint32) 0); - - /*---- check that every output bit is affected by that input bit */ - for (k = 0; k < MAXPAIR; k += 2) { - spif_uint32_t finished = 1; - - /* keys have one bit different */ - for (l = 0; l < hlen + 1; l++) { - a[l] = b[l] = (spif_uint8_t) 0; + + TEST_BEGIN("effect of every input bit on every output bit"); + for (key_length = 0; key_length < MAXLEN; key_length++) { + /* For each key length up to 70 bytes... */ + + if ((hash_func == spifhash_jenkins32) + && (key_length % 4)) { + successes++; + continue; + } + trials = 0; + for (input_byte = 0; input_byte < key_length; input_byte++) { + /* ...for each input byte... */ + spif_uint32_t input_bit; + + for (input_bit = 0; input_bit < 8; input_bit++) { + /* ...for each input bit... */ + spif_uint32_t seed; + + for (seed = 1; seed < 8; seed++) { + /* ...use several possible seeds... */ + spif_uint32_t e, f, g, h, x, y; + spif_uint32_t bit_pair; + + /* Initialize to ~0 (0xffffffff). */ + e = f = g = h = x = y = ~(SPIF_CAST(uint32) 0); + + /* ...to make sure every output bit is affected by every input bit. */ + for (bit_pair = 0; bit_pair < MAXPAIR; bit_pair += 2) { + spif_uint8_t buff1[MAXLEN + 1], buff2[MAXLEN + 2]; + spif_uint8_t *pbuff1 = &buff1[0], *pbuff2 = &buff2[1]; + spif_uint32_t hash1, hash2; + + for (j = 0; j < key_length + 1; j++) { + /* Initialize keys to all zeros. */ + pbuff1[j] = pbuff2[j] = (spif_uint8_t) 0; } - /* have a and b be two keys differing in only one bit */ - a[i] ^= (k << j); - a[i] ^= (k >> (8 - j)); - c[0] = jenkins_hash(a, hlen, m); - b[i] ^= ((k + 1) << j); - b[i] ^= ((k + 1) >> (8 - j)); - d[0] = jenkins_hash(b, hlen, m); - /* check every bit is 1, 0, set, and not set at least once */ - for (l = 0; l < HASHSTATE; l++) { - e[l] &= (c[l] ^ d[l]); - f[l] &= ~(c[l] ^ d[l]); - g[l] &= c[l]; - h[l] &= ~c[l]; - x[l] &= d[l]; - y[l] &= ~d[l]; - if (e[l] | f[l] | g[l] | h[l] | x[l] | y[l]) - finished = 0; + + /* Then make them differ by exactly one bit, the input_bit. + bit_pair will always end in 0, so bit_pair + 1 will always + end in 1. It's then shifted by input_bit to test the + current bit to test all 8 of the lowest bits in sequence. */ + pbuff1[input_byte] ^= (bit_pair << input_bit); + pbuff1[input_byte] ^= (bit_pair >> (8 - input_bit)); + pbuff2[input_byte] ^= ((bit_pair + 1) << input_bit); + pbuff2[input_byte] ^= ((bit_pair + 1) >> (8 - input_bit)); + + /* Hash them. */ + if (hash_func == spifhash_jenkins32) { + hash1 = hash_func(pbuff1, key_length / 4, seed); + hash2 = hash_func(pbuff2, key_length / 4, seed); + } else { + hash1 = hash_func(pbuff1, key_length, seed); + hash2 = hash_func(pbuff2, key_length, seed); } - if (finished) + + /* Make sure every bit is 1 or 0 at least once. */ + e &= (hash1 ^ hash2); f &= ~(hash1 ^ hash2); + g &= hash1; h &= ~hash1; + x &= hash2; y &= ~hash2; + if (!(e | f | g | h | x | y)) { + /* They're all 0. That means they've all changed at least once. */ break; + } } - if (k > z) - z = k; - if (k == MAXPAIR) { + if (bit_pair > trials) { + trials = bit_pair; + } + if (bit_pair == MAXPAIR) { +#if UNUSED_BLOCK printf("Some bit didn't change: "); - printf("%.8lx %.8lx %.8lx %.8lx %.8lx %.8lx ", e[0], f[0], g[0], h[0], x[0], y[0]); - printf("i %ld j %ld m %ld len %ld\n", i, j, m, hlen); + printf("%.8lx %.8lx %.8lx %.8lx %.8lx %.8lx ", + SPIF_CAST_C(unsigned long) e, + SPIF_CAST_C(unsigned long) f, + SPIF_CAST_C(unsigned long) g, + SPIF_CAST_C(unsigned long) h, + SPIF_CAST_C(unsigned long) x, + SPIF_CAST_C(unsigned long) y); + printf("input_byte %lu input_bit %lu seed %lu key length %lu\n", + SPIF_CAST_C(unsigned long) input_byte, + SPIF_CAST_C(unsigned long) input_bit, + SPIF_CAST_C(unsigned long) seed, + SPIF_CAST_C(unsigned long) key_length); +#endif } - if (z == MAXPAIR) + if (trials == MAXPAIR) { + /* Easy way to break out of a crapload of for loops. */ goto done; + } } } } done: - if (z < MAXPAIR) { - printf("Mix success %2ld bytes %2ld seeds ", i, m); - printf("required %ld trials\n", z / 2); + if (trials < MAXPAIR) { + successes++; +#if UNUSED_BLOCK + printf("Mix success: %2lu-byte key required %2lu trials (%lu so far).\n", + SPIF_CAST_C(unsigned long) input_byte, + SPIF_CAST_C(unsigned long) trials / 2, + SPIF_CAST_C(unsigned long) successes); +#endif } } - printf("\n"); - } - { - spif_uint8_t buf[MAXLEN + 20], *b; - spif_uint32_t len; - spif_uint8_t q[] = "This is the time for all good men to come to the aid of their country"; - spif_uint8_t qq[] = "xThis is the time for all good men to come to the aid of their country"; - spif_uint8_t qqq[] = "xxThis is the time for all good men to come to the aid of their country"; - spif_uint8_t qqqq[] = "xxxThis is the time for all good men to come to the aid of their country"; - spif_uint32_t h, i, j, ref, x, y; - - printf("Endianness. These should all be the same:\n"); - printf("%.8lx\n", jenkins_hash(q, sizeof(q) - 1, SPIF_CAST(uint32) 0)); - printf("%.8lx\n", jenkins_hash(qq + 1, sizeof(q) - 1, SPIF_CAST(uint32) 0)); - printf("%.8lx\n", jenkins_hash(qqq + 2, sizeof(q) - 1, SPIF_CAST(uint32) 0)); - printf("%.8lx\n", jenkins_hash(qqqq + 3, sizeof(q) - 1, SPIF_CAST(uint32) 0)); - printf("\n"); - for (h = 0, b = buf + 1; h < 8; h++, b++) { - for (i = 0; i < MAXLEN; i++) { - len = i; - for (j = 0; j < i; j++) - *(b + j) = 0; - - /* these should all be equal */ - ref = jenkins_hash(b, len, SPIF_CAST(uint32) 1); - *(b + i) = (spif_uint8_t) ~ 0; - *(b - 1) = (spif_uint8_t) ~ 0; - x = jenkins_hash(b, len, SPIF_CAST(uint32) 1); - y = jenkins_hash(b, len, SPIF_CAST(uint32) 1); - if ((ref != x) || (ref != y)) { - printf("alignment error: %.8lx %.8lx %.8lx %ld %ld\n", ref, x, y, h, i); - } + printf("%.2f%% mix success rate in %d key lengths...", + (100.0 * successes / key_length), key_length); + TEST_FAIL_IF(successes == 0); + TEST_PASS(); + + /* Make sure nothing but the key is hashed, regardless of alignment. */ + TEST_BEGIN("endian cleanliness"); + key_length = CONST_STRLEN(align1); + if (hash_func == spifhash_jenkins32) { + if (key_length % 4) { + TEST_FAIL_IF(key_length); + } else { + key_length /= 4; } } - } - { - spif_uint8_t buf[1]; - spif_uint32_t h, i, state[HASHSTATE]; + ref_hash = hash_func(align1, key_length, 0); + hash = hash_func(align2 + 1, key_length, 0); + /*printf("Reference hash 0x%08x, hash 0x%08x for length %lu\n", ref_hash, hash, key_length);*/ + TEST_FAIL_IF(hash != ref_hash); + hash = hash_func(align3 + 2, key_length, 0); + TEST_FAIL_IF(hash != ref_hash); + hash = hash_func(align4 + 3, key_length, 0); + TEST_FAIL_IF(hash != ref_hash); + + for (j = 0, pbuff = buff + 1; j < 8; j++, pbuff++) { + for (key_length = 0; key_length < MAXLEN; key_length++) { + if ((hash_func == spifhash_jenkins32) + && (key_length % 4)) { + continue; + } + MEMSET(buff, 0, sizeof(buff)); + if (hash_func == spifhash_jenkins32) { + ref_hash = hash_func(pbuff, key_length / 4, 1); + } else { + ref_hash = hash_func(pbuff, key_length, 1); + } + *(pbuff + key_length) = ~(SPIF_CAST(uint8) 0); + *(pbuff - 1) = ~(SPIF_CAST(uint8) 0); + if (hash_func == spifhash_jenkins32) { + hash = hash_func(pbuff, key_length / 4, 1); + } else { + hash = hash_func(pbuff, key_length, 1); + } + /*printf("Reference hash 0x%08x, hash 0x%08x for length %lu\n", ref_hash, hash, key_length);*/ + TEST_FAIL_IF(hash != ref_hash); + } + } + TEST_PASS(); - buf[0] = ~0; - for (i = 0; i < HASHSTATE; i++) - state[i] = 1; - printf("These should all be different\n"); - for (i = 0, h = 0; i < 8; i++) { - h = jenkins_hash(buf, SPIF_CAST(uint32) 0, h); - printf("%2ld 0-byte strings, hash is %.8lx\n", i, h); + /* We cannot test the rotating hash or the FNV hash here. The + rotating hash repeats after 4 zero-length keys. The FNV + hash generates constant hash values for zero-length keys. */ + if ((hash_func != spifhash_rotating) + && (hash_func != spifhash_fnv)) { + spif_uint32_t null_hashes[8]; + spif_uint8_t one_byte; + + TEST_BEGIN("hashes of empty strings"); + one_byte = ~0; + for (j = 0, hash = 0; j < 8; j++) { + spif_uint32_t k; + + hash = hash_func(&one_byte, SPIF_CAST(uint32) 0, hash); + null_hashes[j] = hash; + /*printf("Empty string hash %lu is 0x%08x\n", j, hash);*/ + for (k = j - 1; k < 8; k--) { + TEST_FAIL_IF(null_hashes[j] == null_hashes[k]); + } + } + TEST_PASS(); } } @@ -938,13 +1017,13 @@ test_list(void) for (i = 0; i < 3; i++) { if (i == 0) { - TEST_NOTICE("Testing list interface class, linked_list instance:"); + TEST_NOTICE("*** Testing list interface class, linked_list instance:"); testlist = SPIF_LIST_NEW(linked_list); } else if (i == 1) { - TEST_NOTICE("Testing list interface class, dlinked_list instance:"); + TEST_NOTICE("*** Testing list interface class, dlinked_list instance:"); testlist = SPIF_LIST_NEW(dlinked_list); } else if (i == 2) { - TEST_NOTICE("Testing list interface class, array instance:"); + TEST_NOTICE("*** Testing list interface class, array instance:"); testlist = SPIF_LIST_NEW(array); } else if (i == 3) { } @@ -1204,13 +1283,13 @@ test_vector(void) for (i = 0; i < 3; i++) { if (i == 0) { - TEST_NOTICE("Testing vector interface class, linked_list instance:"); + TEST_NOTICE("*** Testing vector interface class, linked_list instance:"); testvector = SPIF_VECTOR_NEW(linked_list); } else if (i == 1) { - TEST_NOTICE("Testing vector interface class, dlinked_list instance:"); + TEST_NOTICE("*** Testing vector interface class, dlinked_list instance:"); testvector = SPIF_VECTOR_NEW(dlinked_list); } else if (i == 2) { - TEST_NOTICE("Testing vector interface class, array instance:"); + TEST_NOTICE("*** Testing vector interface class, array instance:"); testvector = SPIF_VECTOR_NEW(array); } else if (i == 3) { } |