From d8a373e85af372267334ba1b4b42cb9046e376ae Mon Sep 17 00:00:00 2001 From: Michael Jennings Date: Fri, 23 Jan 2004 01:44:51 +0000 Subject: Thu Jan 22 20:42:40 2004 Michael Jennings (mej) Added a few new hashes. Fixed a typo in the old hashes. LibAST-ized the hash tests and made new performance tests. Anybody care to comment on the validity of my performance tests? All 6 algorithms end up with pretty much the same results. Do I need to use a dictionary instead of random "words?" That's kinda what I'm thinking.... SVN revision: 8623 --- src/builtin_hashes.c | 193 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 154 insertions(+), 39 deletions(-) (limited to 'src') diff --git a/src/builtin_hashes.c b/src/builtin_hashes.c index ef07ea7..eeb44a1 100644 --- a/src/builtin_hashes.c +++ b/src/builtin_hashes.c @@ -29,6 +29,8 @@ static const char cvs_ident[] = "$Id$"; #include +#define BUILTIN_RANDOM_SEED (SPIF_CAST(uint32) 0xf721b64d) + /* * Bob Jenkins' hash algorithm as published in December 1996. Public * domain. See http://burtleburtle.net/bob/hash/ @@ -43,8 +45,8 @@ static const char cvs_ident[] = "$Id$"; * instructions for an n-byte key. * * For a hash value of w bits, the returned value should be - * bitwise-AND'd with JENKINS_HASH_MASK(w), and the hash table should - * have JENKINS_HASH_SIZE(w) buckets. + * bitwise-AND'd with SPIFHASH_MASK(w), and the hash table should + * have SPIFHASH_SIZE(w) buckets. * * @param key Pointer to bitstream holding key. * @param length Number of bytes in bitstream. @@ -54,12 +56,12 @@ static const char cvs_ident[] = "$Id$"; * */ spif_uint32_t -jenkins_hash(register spif_uint8_t *key, register spif_uint32_t length, register spif_uint32_t seed) +spifhash_jenkins(register spif_uint8_t *key, register spif_uint32_t length, register spif_uint32_t seed) { register spif_uint32_t a, b, c, len; len = length; - a = b = 0xf721b64d; /* This can be any 32-bit value. */ + a = b = BUILTIN_RANDOM_SEED; /* This can be any 32-bit value. */ c = seed; /* The loop below handles most of the key (all but the last @@ -68,7 +70,7 @@ jenkins_hash(register spif_uint8_t *key, register spif_uint32_t length, register a += (key[0] + (SPIF_CAST(uint32) key[1] << 8) + (SPIF_CAST(uint32) key[2] << 16) + (SPIF_CAST(uint32) key[3] << 24)); b += (key[4] + (SPIF_CAST(uint32) key[5] << 8) + (SPIF_CAST(uint32) key[6] << 16) + (SPIF_CAST(uint32) key[7] << 24)); c += (key[8] + (SPIF_CAST(uint32) key[9] << 8) + (SPIF_CAST(uint32) key[10] << 16) + (SPIF_CAST(uint32) key[11] << 24)); - JENKINS_MIX(a, b, c); + SPIFHASH_JENKINS_MIX(a, b, c); key += 12; len -= 12; } @@ -90,7 +92,7 @@ jenkins_hash(register spif_uint8_t *key, register spif_uint32_t length, register case 1: a += key[0]; /* case 0: nothing left to add */ } - JENKINS_MIX(a, b, c); + SPIFHASH_JENKINS_MIX(a, b, c); return c; } @@ -100,10 +102,10 @@ jenkins_hash(register spif_uint8_t *key, register spif_uint32_t length, register * * This function hashes a series of 32-bit unsigned integers of a * given length into a 32-bit hash value suitable for use in hash - * tables. This hash is basically identical to jenkins_hash(), except + * tables. This hash is basically identical to spifhash_jenkins(), except * that the key length must be a whole number of 32-bit dword's, and * the length is given in spif_uint32_t's, not bytes. It is much - * faster than jenkins_hash(), so if padding keys to 32 bit chunks is + * faster than spifhash_jenkins(), so if padding keys to 32 bit chunks is * inexpensive, this function is probably preferable. * * @param key Pointer to bitstream holding key. @@ -114,22 +116,23 @@ jenkins_hash(register spif_uint8_t *key, register spif_uint32_t length, register * */ spif_uint32_t -jenkins_hash32(register spif_uint32_t *key, register spif_uint32_t length, register spif_uint32_t seed) +spifhash_jenkins32(spif_uint8_t *key, register spif_uint32_t length, register spif_uint32_t seed) { register spif_uint32_t a, b, c, len; + register spif_uint32_t *key_dword = SPIF_CAST_PTR(uint32) key; len = length; - a = b = 0xf721b64d; /* This can be any 32-bit value. */ + a = b = BUILTIN_RANDOM_SEED; /* This can be any 32-bit value. */ c = seed; /* The loop below handles most of the key (all but the last length % 3 uint32's). */ while (len >= 3) { - a += key[0]; - b += key[1]; - c += key[2]; - JENKINS_MIX(a, b, c); - key += 3; + a += key_dword[0]; + b += key_dword[1]; + c += key_dword[2]; + SPIFHASH_JENKINS_MIX(a, b, c); + key_dword += 3; len -= 3; } @@ -137,11 +140,11 @@ jenkins_hash32(register spif_uint32_t *key, register spif_uint32_t length, regis uint32's. All cases drop through to the next case. */ c += length; switch (len) { - case 2: b += key[1]; - case 1: a += key[0]; + case 2: b += key_dword[1]; + case 1: a += key_dword[0]; /* case 0: nothing left to add */ } - JENKINS_MIX(a, b, c); + SPIFHASH_JENKINS_MIX(a, b, c); return c; } @@ -152,9 +155,9 @@ jenkins_hash32(register spif_uint32_t *key, register spif_uint32_t length, regis * * This function hashes a bitstream of a given length into a 32-bit * hash value suitable for use in hash tables. This hash is basically - * identical to jenkins_hash(), except that it only works on + * identical to spifhash_jenkins(), except that it only works on * little-endian machines (e.g., Intel x86 and VAX). It is faster - * than jenkins_hash(), so it should be preferred on little-endian + * than spifhash_jenkins(), so it should be preferred on little-endian * systems. * * @param key Pointer to bitstream holding key. @@ -165,12 +168,12 @@ jenkins_hash32(register spif_uint32_t *key, register spif_uint32_t length, regis * */ spif_uint32_t -jenkins_hashLE(register spif_uint8_t *key, register spif_uint32_t length, register spif_uint32_t seed) +spifhash_jenkinsLE(register spif_uint8_t *key, register spif_uint32_t length, register spif_uint32_t seed) { register spif_uint32_t a, b, c, len; len = length; - a = b = 0xf721b64d; /* This can be any 32-bit value. */ + a = b = BUILTIN_RANDOM_SEED; /* This can be any 32-bit value. */ c = seed; /* The loop below handles most of the key (all but the last @@ -181,7 +184,7 @@ jenkins_hashLE(register spif_uint8_t *key, register spif_uint32_t length, regist a += (key[0] + (SPIF_CAST(uint32) key[1] << 8) + (SPIF_CAST(uint32) key[2] << 16) + (SPIF_CAST(uint32) key[3] << 24)); b += (key[4] + (SPIF_CAST(uint32) key[5] << 8) + (SPIF_CAST(uint32) key[6] << 16) + (SPIF_CAST(uint32) key[7] << 24)); c += (key[8] + (SPIF_CAST(uint32) key[9] << 8) + (SPIF_CAST(uint32) key[10] << 16) + (SPIF_CAST(uint32) key[11] << 24)); - JENKINS_MIX(a, b, c); + SPIFHASH_JENKINS_MIX(a, b, c); key += 12; len -= 12; } @@ -189,11 +192,11 @@ jenkins_hashLE(register spif_uint8_t *key, register spif_uint32_t length, regist /* 32-bit aligned. Use speedier method. */ while (len >= 12) { /* These three lines are the only ones which differ from - jenkins_hash(). */ - a += *key; + spifhash_jenkins(). */ + a += *(SPIF_CAST_PTR(uint32) key); b += *(SPIF_CAST_PTR(uint32) (key + 4)); c += *(SPIF_CAST_PTR(uint32) (key + 8)); - JENKINS_MIX(a, b, c); + SPIFHASH_JENKINS_MIX(a, b, c); key += 12; len -= 12; } @@ -203,21 +206,133 @@ jenkins_hashLE(register spif_uint8_t *key, register spif_uint32_t length, regist bytes. All cases drop through to the next case. */ c += length; switch (len) { - case 11: c += (SPIF_CAST(uint32) key[10] << 24); - case 10: c += (SPIF_CAST(uint32) key[9] << 16); - case 9: c += (SPIF_CAST(uint32) key[8] << 8); - case 8: b += (SPIF_CAST(uint32) key[7] << 24); - case 7: b += (SPIF_CAST(uint32) key[6] << 16); - case 6: b += (SPIF_CAST(uint32) key[5] << 8); - case 5: b += key[4]; - case 4: a += (SPIF_CAST(uint32) key[3] << 24); - case 3: a += (SPIF_CAST(uint32) key[2] << 16); - case 2: a += (SPIF_CAST(uint32) key[1] << 8); - case 1: a += key[0]; - /* case 0: nothing left to add */ + case 11: + c += (SPIF_CAST(uint32) key[10] << 24); + case 10: + c += (SPIF_CAST(uint32) key[9] << 16); + case 9: + c += (SPIF_CAST(uint32) key[8] << 8); + case 8: + b += (SPIF_CAST(uint32) key[7] << 24); + case 7: + b += (SPIF_CAST(uint32) key[6] << 16); + case 6: + b += (SPIF_CAST(uint32) key[5] << 8); + case 5: + b += key[4]; + case 4: + a += (SPIF_CAST(uint32) key[3] << 24); + case 3: + a += (SPIF_CAST(uint32) key[2] << 16); + case 2: + a += (SPIF_CAST(uint32) key[1] << 8); + case 1: + a += key[0]; + /* case 0: nothing left to add */ } - JENKINS_MIX(a, b, c); + SPIFHASH_JENKINS_MIX(a, b, c); return c; } #endif + + +/** + * The rotating hash. + * + * This is an implementation of a rotating hash algorithm with a + * slight modification so that it can be used with a 32-bit bitmask + * and a power-of-two table size (so as to avoid the expensive + * "hash % prime" step). + * + * This algorithm uses 8n+6 instructions to hash a key of n bytes. + * The mix step is a left bitwise rotation by 4, so systems with a + * rotate instruction will save 2 instructions per loop. + * + * @param key The arbitrary-length key. + * @param len The length of the key, in bytes. + * @param seed An arbitrary seed value. + * @return A 32-bit hash value. + * + */ +spif_uint32_t +spifhash_rotating(spif_uint8_t *key, spif_uint32_t len, spif_uint32_t seed) +{ + spif_uint32_t hash, i; + + if (!seed) { + seed = BUILTIN_RANDOM_SEED; + } + for (hash = seed, i = 0; i < len; i++) { + hash = (hash << 4) ^ (hash >> 28) ^ key[i]; + } + return (hash ^ (hash >> 10) ^ (hash >> 20)); +} + +/** + * The one-at-a-time hash. + * + * This is an implementation of the one-at-a-time hash. It is similar + * to spifhash_rotating(). It uses 9n+9 instructions to hash a key of n + * bytes. A full 32-bit key is produced. No funneling weaknesses are + * known. + * + * @param key The arbitrary-length key. + * @param len The length of the key, in bytes. + * @param seed An arbitrary seed value. + * @return A 32-bit hash value. + * + */ +spif_uint32_t +spifhash_one_at_a_time(spif_uint8_t *key, spif_uint32_t len, spif_uint32_t seed) +{ + spif_uint32_t hash, i; + + if (!seed) { + seed = BUILTIN_RANDOM_SEED; + } + for (hash = seed, i = 0; i < len; i++) { + hash += key[i]; + hash += (hash << 10); + hash ^= (hash >> 6); + } + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + return hash; +} + +/** + * The FNV hash. + * + * This is the Fowler/Noll/Vo (FNV) hash. This code is a modified + * version of that which appears at + * http://www.isthe.com/chongo/tech/comp/fnv/ + * + * @param key The arbitrary-length key. + * @param len The length of the key, in bytes. + * @param seed An arbitrary seed value. + * @return A 32-bit hash value. + * + */ +spif_uint32_t +spifhash_fnv(spif_uint8_t *key, spif_uint32_t len, spif_uint32_t seed) +{ + spif_uint8_t *key_end = key + len; + spif_uint32_t hash; + + if (!seed) { + seed = SPIF_CAST(uint32) 0x811c9dc5; /* FNV-1a 32-bit init. */ + } + for (hash = seed; key < key_end; key++) { + hash ^= SPIF_CAST(uint32) (*key); + +#ifdef __GNUC__ + hash += (hash << 1) + (hash << 4) + (hash << 7) + (hash << 8) + (hash << 24); +#else + hash *= SPIF_CAST(uint32) 0x01000193; /* 32-bit FNV-1 prime. */ +#endif + } + + return hash; +} -- cgit v1.2.1