summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMichael Jennings <mej@kainx.org>2004-01-23 01:44:51 +0000
committerMichael Jennings <mej@kainx.org>2004-01-23 01:44:51 +0000
commitd8a373e85af372267334ba1b4b42cb9046e376ae (patch)
tree336d9698d1d3ade4c412886e20785fc056aadd6b /src
parentecb629cbc11f5d2c326f95b85d041e7521a41cfb (diff)
downloadlibast-d8a373e85af372267334ba1b4b42cb9046e376ae.tar.gz
Thu Jan 22 20:42:40 2004 Michael Jennings (mej)
Added a few new hashes. Fixed a typo in the old hashes. LibAST-ized the hash tests and made new performance tests. Anybody care to comment on the validity of my performance tests? All 6 algorithms end up with pretty much the same results. Do I need to use a dictionary instead of random "words?" That's kinda what I'm thinking.... SVN revision: 8623
Diffstat (limited to 'src')
-rw-r--r--src/builtin_hashes.c193
1 files changed, 154 insertions, 39 deletions
diff --git a/src/builtin_hashes.c b/src/builtin_hashes.c
index ef07ea7..eeb44a1 100644
--- a/src/builtin_hashes.c
+++ b/src/builtin_hashes.c
@@ -29,6 +29,8 @@ static const char cvs_ident[] = "$Id$";
#include <libast_internal.h>
+#define BUILTIN_RANDOM_SEED (SPIF_CAST(uint32) 0xf721b64d)
+
/*
* Bob Jenkins' hash algorithm as published in December 1996. Public
* domain. See http://burtleburtle.net/bob/hash/
@@ -43,8 +45,8 @@ static const char cvs_ident[] = "$Id$";
* instructions for an n-byte key.
*
* For a hash value of w bits, the returned value should be
- * bitwise-AND'd with JENKINS_HASH_MASK(w), and the hash table should
- * have JENKINS_HASH_SIZE(w) buckets.
+ * bitwise-AND'd with SPIFHASH_MASK(w), and the hash table should
+ * have SPIFHASH_SIZE(w) buckets.
*
* @param key Pointer to bitstream holding key.
* @param length Number of bytes in bitstream.
@@ -54,12 +56,12 @@ static const char cvs_ident[] = "$Id$";
*
*/
spif_uint32_t
-jenkins_hash(register spif_uint8_t *key, register spif_uint32_t length, register spif_uint32_t seed)
+spifhash_jenkins(register spif_uint8_t *key, register spif_uint32_t length, register spif_uint32_t seed)
{
register spif_uint32_t a, b, c, len;
len = length;
- a = b = 0xf721b64d; /* This can be any 32-bit value. */
+ a = b = BUILTIN_RANDOM_SEED; /* This can be any 32-bit value. */
c = seed;
/* The loop below handles most of the key (all but the last
@@ -68,7 +70,7 @@ jenkins_hash(register spif_uint8_t *key, register spif_uint32_t length, register
a += (key[0] + (SPIF_CAST(uint32) key[1] << 8) + (SPIF_CAST(uint32) key[2] << 16) + (SPIF_CAST(uint32) key[3] << 24));
b += (key[4] + (SPIF_CAST(uint32) key[5] << 8) + (SPIF_CAST(uint32) key[6] << 16) + (SPIF_CAST(uint32) key[7] << 24));
c += (key[8] + (SPIF_CAST(uint32) key[9] << 8) + (SPIF_CAST(uint32) key[10] << 16) + (SPIF_CAST(uint32) key[11] << 24));
- JENKINS_MIX(a, b, c);
+ SPIFHASH_JENKINS_MIX(a, b, c);
key += 12;
len -= 12;
}
@@ -90,7 +92,7 @@ jenkins_hash(register spif_uint8_t *key, register spif_uint32_t length, register
case 1: a += key[0];
/* case 0: nothing left to add */
}
- JENKINS_MIX(a, b, c);
+ SPIFHASH_JENKINS_MIX(a, b, c);
return c;
}
@@ -100,10 +102,10 @@ jenkins_hash(register spif_uint8_t *key, register spif_uint32_t length, register
*
* This function hashes a series of 32-bit unsigned integers of a
* given length into a 32-bit hash value suitable for use in hash
- * tables. This hash is basically identical to jenkins_hash(), except
+ * tables. This hash is basically identical to spifhash_jenkins(), except
* that the key length must be a whole number of 32-bit dword's, and
* the length is given in spif_uint32_t's, not bytes. It is much
- * faster than jenkins_hash(), so if padding keys to 32 bit chunks is
+ * faster than spifhash_jenkins(), so if padding keys to 32 bit chunks is
* inexpensive, this function is probably preferable.
*
* @param key Pointer to bitstream holding key.
@@ -114,22 +116,23 @@ jenkins_hash(register spif_uint8_t *key, register spif_uint32_t length, register
*
*/
spif_uint32_t
-jenkins_hash32(register spif_uint32_t *key, register spif_uint32_t length, register spif_uint32_t seed)
+spifhash_jenkins32(spif_uint8_t *key, register spif_uint32_t length, register spif_uint32_t seed)
{
register spif_uint32_t a, b, c, len;
+ register spif_uint32_t *key_dword = SPIF_CAST_PTR(uint32) key;
len = length;
- a = b = 0xf721b64d; /* This can be any 32-bit value. */
+ a = b = BUILTIN_RANDOM_SEED; /* This can be any 32-bit value. */
c = seed;
/* The loop below handles most of the key (all but the last
length % 3 uint32's). */
while (len >= 3) {
- a += key[0];
- b += key[1];
- c += key[2];
- JENKINS_MIX(a, b, c);
- key += 3;
+ a += key_dword[0];
+ b += key_dword[1];
+ c += key_dword[2];
+ SPIFHASH_JENKINS_MIX(a, b, c);
+ key_dword += 3;
len -= 3;
}
@@ -137,11 +140,11 @@ jenkins_hash32(register spif_uint32_t *key, register spif_uint32_t length, regis
uint32's. All cases drop through to the next case. */
c += length;
switch (len) {
- case 2: b += key[1];
- case 1: a += key[0];
+ case 2: b += key_dword[1];
+ case 1: a += key_dword[0];
/* case 0: nothing left to add */
}
- JENKINS_MIX(a, b, c);
+ SPIFHASH_JENKINS_MIX(a, b, c);
return c;
}
@@ -152,9 +155,9 @@ jenkins_hash32(register spif_uint32_t *key, register spif_uint32_t length, regis
*
* This function hashes a bitstream of a given length into a 32-bit
* hash value suitable for use in hash tables. This hash is basically
- * identical to jenkins_hash(), except that it only works on
+ * identical to spifhash_jenkins(), except that it only works on
* little-endian machines (e.g., Intel x86 and VAX). It is faster
- * than jenkins_hash(), so it should be preferred on little-endian
+ * than spifhash_jenkins(), so it should be preferred on little-endian
* systems.
*
* @param key Pointer to bitstream holding key.
@@ -165,12 +168,12 @@ jenkins_hash32(register spif_uint32_t *key, register spif_uint32_t length, regis
*
*/
spif_uint32_t
-jenkins_hashLE(register spif_uint8_t *key, register spif_uint32_t length, register spif_uint32_t seed)
+spifhash_jenkinsLE(register spif_uint8_t *key, register spif_uint32_t length, register spif_uint32_t seed)
{
register spif_uint32_t a, b, c, len;
len = length;
- a = b = 0xf721b64d; /* This can be any 32-bit value. */
+ a = b = BUILTIN_RANDOM_SEED; /* This can be any 32-bit value. */
c = seed;
/* The loop below handles most of the key (all but the last
@@ -181,7 +184,7 @@ jenkins_hashLE(register spif_uint8_t *key, register spif_uint32_t length, regist
a += (key[0] + (SPIF_CAST(uint32) key[1] << 8) + (SPIF_CAST(uint32) key[2] << 16) + (SPIF_CAST(uint32) key[3] << 24));
b += (key[4] + (SPIF_CAST(uint32) key[5] << 8) + (SPIF_CAST(uint32) key[6] << 16) + (SPIF_CAST(uint32) key[7] << 24));
c += (key[8] + (SPIF_CAST(uint32) key[9] << 8) + (SPIF_CAST(uint32) key[10] << 16) + (SPIF_CAST(uint32) key[11] << 24));
- JENKINS_MIX(a, b, c);
+ SPIFHASH_JENKINS_MIX(a, b, c);
key += 12;
len -= 12;
}
@@ -189,11 +192,11 @@ jenkins_hashLE(register spif_uint8_t *key, register spif_uint32_t length, regist
/* 32-bit aligned. Use speedier method. */
while (len >= 12) {
/* These three lines are the only ones which differ from
- jenkins_hash(). */
- a += *key;
+ spifhash_jenkins(). */
+ a += *(SPIF_CAST_PTR(uint32) key);
b += *(SPIF_CAST_PTR(uint32) (key + 4));
c += *(SPIF_CAST_PTR(uint32) (key + 8));
- JENKINS_MIX(a, b, c);
+ SPIFHASH_JENKINS_MIX(a, b, c);
key += 12;
len -= 12;
}
@@ -203,21 +206,133 @@ jenkins_hashLE(register spif_uint8_t *key, register spif_uint32_t length, regist
bytes. All cases drop through to the next case. */
c += length;
switch (len) {
- case 11: c += (SPIF_CAST(uint32) key[10] << 24);
- case 10: c += (SPIF_CAST(uint32) key[9] << 16);
- case 9: c += (SPIF_CAST(uint32) key[8] << 8);
- case 8: b += (SPIF_CAST(uint32) key[7] << 24);
- case 7: b += (SPIF_CAST(uint32) key[6] << 16);
- case 6: b += (SPIF_CAST(uint32) key[5] << 8);
- case 5: b += key[4];
- case 4: a += (SPIF_CAST(uint32) key[3] << 24);
- case 3: a += (SPIF_CAST(uint32) key[2] << 16);
- case 2: a += (SPIF_CAST(uint32) key[1] << 8);
- case 1: a += key[0];
- /* case 0: nothing left to add */
+ case 11:
+ c += (SPIF_CAST(uint32) key[10] << 24);
+ case 10:
+ c += (SPIF_CAST(uint32) key[9] << 16);
+ case 9:
+ c += (SPIF_CAST(uint32) key[8] << 8);
+ case 8:
+ b += (SPIF_CAST(uint32) key[7] << 24);
+ case 7:
+ b += (SPIF_CAST(uint32) key[6] << 16);
+ case 6:
+ b += (SPIF_CAST(uint32) key[5] << 8);
+ case 5:
+ b += key[4];
+ case 4:
+ a += (SPIF_CAST(uint32) key[3] << 24);
+ case 3:
+ a += (SPIF_CAST(uint32) key[2] << 16);
+ case 2:
+ a += (SPIF_CAST(uint32) key[1] << 8);
+ case 1:
+ a += key[0];
+ /* case 0: nothing left to add */
}
- JENKINS_MIX(a, b, c);
+ SPIFHASH_JENKINS_MIX(a, b, c);
return c;
}
#endif
+
+
+/**
+ * The rotating hash.
+ *
+ * This is an implementation of a rotating hash algorithm with a
+ * slight modification so that it can be used with a 32-bit bitmask
+ * and a power-of-two table size (so as to avoid the expensive
+ * "hash % prime" step).
+ *
+ * This algorithm uses 8n+6 instructions to hash a key of n bytes.
+ * The mix step is a left bitwise rotation by 4, so systems with a
+ * rotate instruction will save 2 instructions per loop.
+ *
+ * @param key The arbitrary-length key.
+ * @param len The length of the key, in bytes.
+ * @param seed An arbitrary seed value.
+ * @return A 32-bit hash value.
+ *
+ */
+spif_uint32_t
+spifhash_rotating(spif_uint8_t *key, spif_uint32_t len, spif_uint32_t seed)
+{
+ spif_uint32_t hash, i;
+
+ if (!seed) {
+ seed = BUILTIN_RANDOM_SEED;
+ }
+ for (hash = seed, i = 0; i < len; i++) {
+ hash = (hash << 4) ^ (hash >> 28) ^ key[i];
+ }
+ return (hash ^ (hash >> 10) ^ (hash >> 20));
+}
+
+/**
+ * The one-at-a-time hash.
+ *
+ * This is an implementation of the one-at-a-time hash. It is similar
+ * to spifhash_rotating(). It uses 9n+9 instructions to hash a key of n
+ * bytes. A full 32-bit key is produced. No funneling weaknesses are
+ * known.
+ *
+ * @param key The arbitrary-length key.
+ * @param len The length of the key, in bytes.
+ * @param seed An arbitrary seed value.
+ * @return A 32-bit hash value.
+ *
+ */
+spif_uint32_t
+spifhash_one_at_a_time(spif_uint8_t *key, spif_uint32_t len, spif_uint32_t seed)
+{
+ spif_uint32_t hash, i;
+
+ if (!seed) {
+ seed = BUILTIN_RANDOM_SEED;
+ }
+ for (hash = seed, i = 0; i < len; i++) {
+ hash += key[i];
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ }
+ hash += (hash << 3);
+ hash ^= (hash >> 11);
+ hash += (hash << 15);
+ return hash;
+}
+
+/**
+ * The FNV hash.
+ *
+ * This is the Fowler/Noll/Vo (FNV) hash. This code is a modified
+ * version of that which appears at
+ * http://www.isthe.com/chongo/tech/comp/fnv/
+ *
+ * @param key The arbitrary-length key.
+ * @param len The length of the key, in bytes.
+ * @param seed An arbitrary seed value.
+ * @return A 32-bit hash value.
+ *
+ */
+spif_uint32_t
+spifhash_fnv(spif_uint8_t *key, spif_uint32_t len, spif_uint32_t seed)
+{
+ spif_uint8_t *key_end = key + len;
+ spif_uint32_t hash;
+
+ if (!seed) {
+ seed = SPIF_CAST(uint32) 0x811c9dc5; /* FNV-1a 32-bit init. */
+ }
+ for (hash = seed; key < key_end; key++) {
+ hash ^= SPIF_CAST(uint32) (*key);
+
+#ifdef __GNUC__
+ hash += (hash << 1) + (hash << 4) + (hash << 7) + (hash << 8) + (hash << 24);
+#else
+ hash *= SPIF_CAST(uint32) 0x01000193; /* 32-bit FNV-1 prime. */
+#endif
+ }
+
+ return hash;
+}