Thu Jan 22 20:42:40 2004 Michael Jennings (mej)

Added a few new hashes. Fixed a typo in the old hashes. LibAST-ized the hash tests and made new performance tests. Anybody care to comment on the validity of my performance tests? All 6 algorithms end up with pretty much the same results. Do I need to use a dictionary instead of random "words?" That's kinda what I'm thinking.... SVN revision: 8623
author: Michael Jennings <mej@kainx.org> 2004-01-23 01:44:51 +0000
committer: Michael Jennings <mej@kainx.org> 2004-01-23 01:44:51 +0000
commit: d8a373e85af372267334ba1b4b42cb9046e376ae (patch)
tree: 336d9698d1d3ade4c412886e20785fc056aadd6b /src
parent: ecb629cbc11f5d2c326f95b85d041e7521a41cfb (diff)
download: libast-d8a373e85af372267334ba1b4b42cb9046e376ae.tar.gz
1 files changed, 154 insertions, 39 deletions
diff --git a/src/builtin_hashes.c b/src/builtin_hashes.c
index ef07ea7..eeb44a1 100644
--- a/src/builtin_hashes.c
+++ b/src/builtin_hashes.c
@@ -29,6 +29,8 @@ static const char cvs_ident[] = "$Id$";
 
 #include <libast_internal.h>
 
+#define BUILTIN_RANDOM_SEED   (SPIF_CAST(uint32) 0xf721b64d)
+
 /*
  * Bob Jenkins' hash algorithm as published in December 1996.  Public
  * domain.  See http://burtleburtle.net/bob/hash/
@@ -43,8 +45,8 @@ static const char cvs_ident[] = "$Id$";
  * instructions for an n-byte key.
  *
  * For a hash value of w bits, the returned value should be
- * bitwise-AND'd with JENKINS_HASH_MASK(w), and the hash table should
- * have JENKINS_HASH_SIZE(w) buckets.
+ * bitwise-AND'd with SPIFHASH_MASK(w), and the hash table should
+ * have SPIFHASH_SIZE(w) buckets.
  *
  * @param key    Pointer to bitstream holding key.
  * @param length Number of bytes in bitstream.
@@ -54,12 +56,12 @@ static const char cvs_ident[] = "$Id$";
  *
  */
 spif_uint32_t
-jenkins_hash(register spif_uint8_t *key, register spif_uint32_t length, register spif_uint32_t seed)
+spifhash_jenkins(register spif_uint8_t *key, register spif_uint32_t length, register spif_uint32_t seed)
 {
     register spif_uint32_t a, b, c, len;
 
     len = length;
-    a = b = 0xf721b64d;  /* This can be any 32-bit value. */
+    a = b = BUILTIN_RANDOM_SEED;  /* This can be any 32-bit value. */
     c = seed;
 
     /* The loop below handles most of the key (all but the last
@@ -68,7 +70,7 @@ jenkins_hash(register spif_uint8_t *key, register spif_uint32_t length, register
         a += (key[0] + (SPIF_CAST(uint32) key[1] << 8) + (SPIF_CAST(uint32) key[2] << 16) + (SPIF_CAST(uint32) key[3] << 24));
         b += (key[4] + (SPIF_CAST(uint32) key[5] << 8) + (SPIF_CAST(uint32) key[6] << 16) + (SPIF_CAST(uint32) key[7] << 24));
         c += (key[8] + (SPIF_CAST(uint32) key[9] << 8) + (SPIF_CAST(uint32) key[10] << 16) + (SPIF_CAST(uint32) key[11] << 24));
-        JENKINS_MIX(a, b, c);
+        SPIFHASH_JENKINS_MIX(a, b, c);
         key += 12;
         len -= 12;
     }
@@ -90,7 +92,7 @@ jenkins_hash(register spif_uint8_t *key, register spif_uint32_t length, register
         case 1:   a += key[0];
         /* case 0: nothing left to add */
     }
-    JENKINS_MIX(a, b, c);
+    SPIFHASH_JENKINS_MIX(a, b, c);
 
     return c;
 }
@@ -100,10 +102,10 @@ jenkins_hash(register spif_uint8_t *key, register spif_uint32_t length, register
  *
  * This function hashes a series of 32-bit unsigned integers of a
  * given length into a 32-bit hash value suitable for use in hash
- * tables.  This hash is basically identical to jenkins_hash(), except
+ * tables.  This hash is basically identical to spifhash_jenkins(), except
  * that the key length must be a whole number of 32-bit dword's, and
  * the length is given in spif_uint32_t's, not bytes.  It is much
- * faster than jenkins_hash(), so if padding keys to 32 bit chunks is
+ * faster than spifhash_jenkins(), so if padding keys to 32 bit chunks is
  * inexpensive, this function is probably preferable.
  *
  * @param key    Pointer to bitstream holding key.
@@ -114,22 +116,23 @@ jenkins_hash(register spif_uint8_t *key, register spif_uint32_t length, register
  *
  */
 spif_uint32_t
-jenkins_hash32(register spif_uint32_t *key, register spif_uint32_t length, register spif_uint32_t seed)
+spifhash_jenkins32(spif_uint8_t *key, register spif_uint32_t length, register spif_uint32_t seed)
 {
     register spif_uint32_t a, b, c, len;
+    register spif_uint32_t *key_dword = SPIF_CAST_PTR(uint32) key;
 
     len = length;
-    a = b = 0xf721b64d;  /* This can be any 32-bit value. */
+    a = b = BUILTIN_RANDOM_SEED;  /* This can be any 32-bit value. */
     c = seed;
 
     /* The loop below handles most of the key (all but the last
        length % 3 uint32's). */
     while (len >= 3) {
-        a += key[0];
-        b += key[1];
-        c += key[2];
-        JENKINS_MIX(a, b, c);
-        key += 3;
+        a += key_dword[0];
+        b += key_dword[1];
+        c += key_dword[2];
+        SPIFHASH_JENKINS_MIX(a, b, c);
+        key_dword += 3;
         len -= 3;
     }
 
@@ -137,11 +140,11 @@ jenkins_hash32(register spif_uint32_t *key, register spif_uint32_t length, regis
        uint32's.  All cases drop through to the next case. */
     c += length;
     switch (len) {
-        case 2:   b += key[1];
-        case 1:   a += key[0];
+        case 2:  b += key_dword[1];
+        case 1:  a += key_dword[0];
         /* case 0: nothing left to add */
     }
-    JENKINS_MIX(a, b, c);
+    SPIFHASH_JENKINS_MIX(a, b, c);
 
     return c;
 }
@@ -152,9 +155,9 @@ jenkins_hash32(register spif_uint32_t *key, register spif_uint32_t length, regis
  *
  * This function hashes a bitstream of a given length into a 32-bit
  * hash value suitable for use in hash tables.  This hash is basically
- * identical to jenkins_hash(), except that it only works on
+ * identical to spifhash_jenkins(), except that it only works on
  * little-endian machines (e.g., Intel x86 and VAX).  It is faster
- * than jenkins_hash(), so it should be preferred on little-endian
+ * than spifhash_jenkins(), so it should be preferred on little-endian
  * systems.
  *
  * @param key    Pointer to bitstream holding key.
@@ -165,12 +168,12 @@ jenkins_hash32(register spif_uint32_t *key, register spif_uint32_t length, regis
  *
  */
 spif_uint32_t
-jenkins_hashLE(register spif_uint8_t *key, register spif_uint32_t length, register spif_uint32_t seed)
+spifhash_jenkinsLE(register spif_uint8_t *key, register spif_uint32_t length, register spif_uint32_t seed)
 {
     register spif_uint32_t a, b, c, len;
 
     len = length;
-    a = b = 0xf721b64d;  /* This can be any 32-bit value. */
+    a = b = BUILTIN_RANDOM_SEED;  /* This can be any 32-bit value. */
     c = seed;
 
     /* The loop below handles most of the key (all but the last
@@ -181,7 +184,7 @@ jenkins_hashLE(register spif_uint8_t *key, register spif_uint32_t length, regist
             a += (key[0] + (SPIF_CAST(uint32) key[1] << 8) + (SPIF_CAST(uint32) key[2] << 16) + (SPIF_CAST(uint32) key[3] << 24));
             b += (key[4] + (SPIF_CAST(uint32) key[5] << 8) + (SPIF_CAST(uint32) key[6] << 16) + (SPIF_CAST(uint32) key[7] << 24));
             c += (key[8] + (SPIF_CAST(uint32) key[9] << 8) + (SPIF_CAST(uint32) key[10] << 16) + (SPIF_CAST(uint32) key[11] << 24));
-            JENKINS_MIX(a, b, c);
+            SPIFHASH_JENKINS_MIX(a, b, c);
             key += 12;
             len -= 12;
         }
@@ -189,11 +192,11 @@ jenkins_hashLE(register spif_uint8_t *key, register spif_uint32_t length, regist
         /* 32-bit aligned.  Use speedier method. */
         while (len >= 12) {
             /* These three lines are the only ones which differ from
-               jenkins_hash(). */
-            a += *key;
+               spifhash_jenkins(). */
+            a += *(SPIF_CAST_PTR(uint32) key);
             b += *(SPIF_CAST_PTR(uint32) (key + 4));
             c += *(SPIF_CAST_PTR(uint32) (key + 8));
-            JENKINS_MIX(a, b, c);
+            SPIFHASH_JENKINS_MIX(a, b, c);
             key += 12;
             len -= 12;
         }
@@ -203,21 +206,133 @@ jenkins_hashLE(register spif_uint8_t *key, register spif_uint32_t length, regist
        bytes.  All cases drop through to the next case. */
     c += length;
     switch (len) {
-        case 11:  c += (SPIF_CAST(uint32) key[10] << 24);
-        case 10:  c += (SPIF_CAST(uint32) key[9] << 16);
-        case 9:   c += (SPIF_CAST(uint32) key[8] << 8);
-        case 8:   b += (SPIF_CAST(uint32) key[7] << 24);
-        case 7:   b += (SPIF_CAST(uint32) key[6] << 16);
-        case 6:   b += (SPIF_CAST(uint32) key[5] << 8);
-        case 5:   b += key[4];
-        case 4:   a += (SPIF_CAST(uint32) key[3] << 24);
-        case 3:   a += (SPIF_CAST(uint32) key[2] << 16);
-        case 2:   a += (SPIF_CAST(uint32) key[1] << 8);
-        case 1:   a += key[0];
-        /* case 0: nothing left to add */
+      case 11:
+          c += (SPIF_CAST(uint32) key[10] << 24);
+      case 10:
+          c += (SPIF_CAST(uint32) key[9] << 16);
+      case 9:
+          c += (SPIF_CAST(uint32) key[8] << 8);
+      case 8:
+          b += (SPIF_CAST(uint32) key[7] << 24);
+      case 7:
+          b += (SPIF_CAST(uint32) key[6] << 16);
+      case 6:
+          b += (SPIF_CAST(uint32) key[5] << 8);
+      case 5:
+          b += key[4];
+      case 4:
+          a += (SPIF_CAST(uint32) key[3] << 24);
+      case 3:
+          a += (SPIF_CAST(uint32) key[2] << 16);
+      case 2:
+          a += (SPIF_CAST(uint32) key[1] << 8);
+      case 1:
+          a += key[0];
+          /* case 0: nothing left to add */
     }
-    JENKINS_MIX(a, b, c);
+    SPIFHASH_JENKINS_MIX(a, b, c);
 
     return c;
 }
 #endif
+
+
+/**
+ * The rotating hash.
+ *
+ * This is an implementation of a rotating hash algorithm with a
+ * slight modification so that it can be used with a 32-bit bitmask
+ * and a power-of-two table size (so as to avoid the expensive
+ * "hash % prime" step).
+ *
+ * This algorithm uses 8n+6 instructions to hash a key of n bytes.
+ * The mix step is a left bitwise rotation by 4, so systems with a
+ * rotate instruction will save 2 instructions per loop.
+ *
+ * @param key  The arbitrary-length key.
+ * @param len  The length of the key, in bytes.
+ * @param seed An arbitrary seed value.
+ * @return     A 32-bit hash value.
+ *
+ */
+spif_uint32_t
+spifhash_rotating(spif_uint8_t *key, spif_uint32_t len, spif_uint32_t seed)
+{
+    spif_uint32_t hash, i;
+
+    if (!seed) {
+        seed = BUILTIN_RANDOM_SEED;
+    }
+    for (hash = seed, i = 0; i < len; i++) {
+        hash = (hash << 4) ^ (hash >> 28) ^ key[i];
+    }
+    return (hash ^ (hash >> 10) ^ (hash >> 20));
+}
+
+/**
+ * The one-at-a-time hash.
+ *
+ * This is an implementation of the one-at-a-time hash.  It is similar
+ * to spifhash_rotating().  It uses 9n+9 instructions to hash a key of n
+ * bytes.  A full 32-bit key is produced.  No funneling weaknesses are
+ * known.
+ *
+ * @param key  The arbitrary-length key.
+ * @param len  The length of the key, in bytes.
+ * @param seed An arbitrary seed value.
+ * @return     A 32-bit hash value.
+ *
+ */
+spif_uint32_t
+spifhash_one_at_a_time(spif_uint8_t *key, spif_uint32_t len, spif_uint32_t seed)
+{
+    spif_uint32_t hash, i;
+
+    if (!seed) {
+        seed = BUILTIN_RANDOM_SEED;
+    }
+    for (hash = seed, i = 0; i < len; i++) {
+        hash += key[i];
+        hash += (hash << 10);
+        hash ^= (hash >> 6);
+    }
+    hash += (hash << 3);
+    hash ^= (hash >> 11);
+    hash += (hash << 15);
+    return hash;
+}
+
+/**
+ * The FNV hash.
+ *
+ * This is the Fowler/Noll/Vo (FNV) hash.  This code is a modified
+ * version of that which appears at
+ * http://www.isthe.com/chongo/tech/comp/fnv/
+ *
+ * @param key  The arbitrary-length key.
+ * @param len  The length of the key, in bytes.
+ * @param seed An arbitrary seed value.
+ * @return     A 32-bit hash value.
+ *
+ */
+spif_uint32_t
+spifhash_fnv(spif_uint8_t *key, spif_uint32_t len, spif_uint32_t seed)
+{
+    spif_uint8_t *key_end = key + len;
+    spif_uint32_t hash;
+
+    if (!seed) {
+        seed = SPIF_CAST(uint32) 0x811c9dc5;    /* FNV-1a 32-bit init. */
+    }
+    for (hash = seed; key < key_end; key++) {
+        hash ^= SPIF_CAST(uint32) (*key);
+
+#ifdef __GNUC__
+        hash += (hash << 1) + (hash << 4) + (hash << 7) + (hash << 8) + (hash << 24);
+#else
+        hash *= SPIF_CAST(uint32) 0x01000193;   /* 32-bit FNV-1 prime. */
+#endif
+    }
+
+    return hash;
+}
author	Michael Jennings <mej@kainx.org>	2004-01-23 01:44:51 +0000
committer	Michael Jennings <mej@kainx.org>	2004-01-23 01:44:51 +0000
commit	d8a373e85af372267334ba1b4b42cb9046e376ae (patch)
tree	336d9698d1d3ade4c412886e20785fc056aadd6b /src
parent	ecb629cbc11f5d2c326f95b85d041e7521a41cfb (diff)
download	libast-d8a373e85af372267334ba1b4b42cb9046e376ae.tar.gz