Hash Function Change - Murmur hash and true per process hash seed

This patch does the following: *) Introduces multiple new hash functions to choose from at build time. This includes Murmur-32, SDBM, DJB2, SipHash, SuperFast, and One-at-a-time. Currently this is handled by muning hv.h. Configure support hopefully to follow. *) Changes the default hash to Murmur hash which is faster than the old default One-at-a-time. *) Rips out the old HvREHASH mechanism and replaces it with a per-process random hash seed. *) Changes the old PL_hash_seed from an interpreter value to a global variable. This means it does not have to be copied during interpreter setup or cloning. *) Changes the format of the PERL_HASH_SEED variable to a hex string so that hash seeds longer than fit in an integer are possible. *) Changes the return of Hash::Util::hash_seed() from a number to a string. This is to accomodate hash functions which have more bits than can be fit in an integer. *) Adds new functions to Hash::Util to improve introspection of hashes -) hash_value() - returns an integer hash value for a given string. -) bucket_info() - returns basic hash bucket utilization info -) bucket_stats() - returns more hash bucket utilization info -) bucket_array() - which keys are in which buckets in a hash More details on the new hash functions can be found below: Murmur Hash: (v3) from google, see http://code.google.com/p/smhasher/wiki/MurmurHash3 Superfast Hash: From Paul Hsieh. http://www.azillionmonkeys.com/qed/hash.html DJB2: a hash function from Daniel Bernstein http://www.cse.yorku.ca/~oz/hash.html SDBM: a hash function sdbm. http://www.cse.yorku.ca/~oz/hash.html SipHash: by Jean-Philippe Aumasson and Daniel J. Bernstein. https://www.131002.net/siphash/ They have all be converted into Perl's ugly macro format. I have not done any rigorous testing to make sure this conversion is correct. They seem to function as expected however. All of them use the random hash seed. You can force the use of a given function by defining one of PERL_HASH_FUNC_MURMUR PERL_HASH_FUNC_SUPERFAST PERL_HASH_FUNC_DJB2 PERL_HASH_FUNC_SDBM PERL_HASH_FUNC_ONE_AT_A_TIME Setting the environment variable PERL_HASH_SEED_DEBUG to 1 will make perl output the current seed (changed to hex) and the hash function it has been built with. Setting the environment variable PERL_HASH_SEED to a hex value will cause that value to be used at the seed. Any missing bits of the seed will be set to 0. The bits are filled in from left to right, not the traditional right to left so setting it to FE results in a seed value of "FE000000" not "000000FE". Note that we do the hash seed initialization in perl_construct(). Doing it via perl_alloc() (via init_tls) causes problems under threaded builds as the buffers used for reentrant srand48 functions are not allocated. See also the p5p mail "Hash improvements blocker: portable random code that doesnt depend on a functional interpreter", Message-ID: <CANgJU+X+wNayjsNOpKRqYHnEy_+B9UH_2irRA5O3ZmcYGAAZFQ@mail.gmail.com>
author: Yves Orton <demerphq@gmail.com> 2012-11-17 14:12:04 +0100
committer: Yves Orton <demerphq@gmail.com> 2012-11-17 19:19:43 +0100
commit: 7dc8663964c66a698d31bbdc8e8abed69bddeec3 (patch)
tree: 92f39aa5770c2f401bbe134db921d68cbe4f758a /hv.h
parent: 867b16b50f413adfdb2addcab33ad1d6e61da9d5 (diff)
download: perl-7dc8663964c66a698d31bbdc8e8abed69bddeec3.tar.gz
1 files changed, 499 insertions, 37 deletions
diff --git a/hv.h b/hv.h
index 1e32ab9b42..6983a8065e 100644
--- a/hv.h
+++ b/hv.h
@@ -82,6 +82,7 @@ struct xpvhv_aux {
     AV		*xhv_backreferences; /* back references for weak references */
     HE		*xhv_eiter;	/* current entry of iterator */
     I32		xhv_riter;	/* current root of iterator */
+
 /* Concerning xhv_name_count: When non-zero, xhv_name_u contains a pointer 
  * to an array of HEK pointers, this being the length. The first element is
  * the name of the stash, which may be NULL. If xhv_name_count is positive,
@@ -103,9 +104,6 @@ struct xpvhv {
 };
 
 /* hash a key */
-/* FYI: This is the "One-at-a-Time" algorithm by Bob Jenkins
- * from requirements by Colin Plumb.
- * (http://burtleburtle.net/bob/hash/doobs.html) */
 /* The use of a temporary pointer and the casting games
  * is needed to serve the dual purposes of
  * (a) the hashed data being interpreted as "unsigned char" (new since 5.8,
@@ -118,35 +116,513 @@ struct xpvhv {
  * If USE_HASH_SEED is defined, hash randomisation is done by default
  * If USE_HASH_SEED_EXPLICIT is defined, hash randomisation is done
  * only if the environment variable PERL_HASH_SEED is set.
- * For maximal control, one can define PERL_HASH_SEED.
- * (see also perl.c:perl_parse()).
+ * (see also perl.c:perl_parse() and S_init_tls_and_interp() and util.c:get_hash_seed())
  */
 #ifndef PERL_HASH_SEED
 #   if defined(USE_HASH_SEED) || defined(USE_HASH_SEED_EXPLICIT)
-#       define PERL_HASH_SEED	PL_hash_seed
+#       define PERL_HASH_SEED PL_hash_seed
 #   else
-#       define PERL_HASH_SEED	0
+#       define PERL_HASH_SEED "PeRlHaShhAcKpErl"
 #   endif
 #endif
 
-#define PERL_HASH(hash,str,len) PERL_HASH_INTERNAL_(hash,str,len,0)
+#define PERL_HASH_SEED_U32   *((U32*)PERL_HASH_SEED)
+#define PERL_HASH_SEED_U64_1 (((U64*)PERL_HASH_SEED)[0])
+#define PERL_HASH_SEED_U64_2 (((U64*)PERL_HASH_SEED)[1])
 
-/* Only hv.c and mod_perl should be doing this.  */
+/* legacy - only mod_perl should be doing this.  */
 #ifdef PERL_HASH_INTERNAL_ACCESS
-#define PERL_HASH_INTERNAL(hash,str,len) PERL_HASH_INTERNAL_(hash,str,len,1)
+#define PERL_HASH_INTERNAL(hash,str,len) PERL_HASH(hash,str,len)
+#endif
+
+/* Uncomment one of the following lines to use an alternative hash algorithm.
+#define PERL_HASH_FUNC_SDBM
+#define PERL_HASH_FUNC_DJB2
+#define PERL_HASH_FUNC_SUPERFAST
+#define PERL_HASH_FUNC_MURMUR3
+#define PERL_HASH_FUNC_SIPHASH
+#define PERL_HASH_FUNC_ONE_AT_A_TIME
+*/
+
+#if !(defined(PERL_HASH_FUNC_SDBM) || defined(PERL_HASH_FUNC_DJB2) || defined(PERL_HASH_FUNC_SUPERFAST) || defined(PERL_HASH_FUNC_MURMUR3) || defined(PERL_HASH_FUNC_ONE_AT_A_TIME))
+#define PERL_HASH_FUNC_MURMUR3
+#endif
+
+#if defined(PERL_HASH_FUNC_SIPHASH)
+#define PERL_HASH_FUNC "SIPHASH"
+#define PERL_HASH_SEED_BYTES 16
+
+/* This is SipHash by Jean-Philippe Aumasson and Daniel J. Bernstein.
+ * The authors claim it is relatively secure compared to the alternatives
+ * and that performance wise it is a suitable hash for languages like Perl.
+ * See:
+ *
+ * https://www.131002.net/siphash/
+ *
+ * This implementation seems to perform slightly slower than one-at-a-time for
+ * short keys, but degrades slower for longer keys. Murmur Hash outperforms it
+ * regardless of keys size.
+ *
+ * It is 64 bit only.
+ */
+
+#define PERL_HASH_NEEDS_TWO_SEEDS
+
+#ifndef U64
+#define U64 uint64_t
+#endif
+
+#define ROTL(x,b) (U64)( ((x) << (b)) | ( (x) >> (64 - (b))) )
+
+#define U32TO8_LE(p, v)         \
+    (p)[0] = (U8)((v)      ); (p)[1] = (U8)((v) >>  8); \
+    (p)[2] = (U8)((v) >> 16); (p)[3] = (U8)((v) >> 24);
+
+#define U64TO8_LE(p, v)         \
+  U32TO8_LE((p),     (U32)((v)      ));   \
+  U32TO8_LE((p) + 4, (U32)((v) >> 32));
+
+#define U8TO64_LE(p) \
+  (((U64)((p)[0])      ) | \
+   ((U64)((p)[1]) <<  8) | \
+   ((U64)((p)[2]) << 16) | \
+   ((U64)((p)[3]) << 24) | \
+   ((U64)((p)[4]) << 32) | \
+   ((U64)((p)[5]) << 40) | \
+   ((U64)((p)[6]) << 48) | \
+   ((U64)((p)[7]) << 56))
+
+#define SIPROUND            \
+  do {              \
+    v0_PeRlHaSh += v1_PeRlHaSh; v1_PeRlHaSh=ROTL(v1_PeRlHaSh,13); v1_PeRlHaSh ^= v0_PeRlHaSh; v0_PeRlHaSh=ROTL(v0_PeRlHaSh,32); \
+    v2_PeRlHaSh += v3_PeRlHaSh; v3_PeRlHaSh=ROTL(v3_PeRlHaSh,16); v3_PeRlHaSh ^= v2_PeRlHaSh;     \
+    v0_PeRlHaSh += v3_PeRlHaSh; v3_PeRlHaSh=ROTL(v3_PeRlHaSh,21); v3_PeRlHaSh ^= v0_PeRlHaSh;     \
+    v2_PeRlHaSh += v1_PeRlHaSh; v1_PeRlHaSh=ROTL(v1_PeRlHaSh,17); v1_PeRlHaSh ^= v2_PeRlHaSh; v2_PeRlHaSh=ROTL(v2_PeRlHaSh,32); \
+  } while(0)
+
+/* SipHash-2-4 */
+#define PERL_HASH(hash,str,len) STMT_START { \
+  const char * const strtmp_PeRlHaSh = (str); \
+  const unsigned char *in_PeRlHaSh = (const unsigned char *)strtmp_PeRlHaSh; \
+  const U32 inlen_PeRlHaSh = (len); \
+  /* "somepseudorandomlygeneratedbytes" */ \
+  U64 v0_PeRlHaSh = 0x736f6d6570736575ULL; \
+  U64 v1_PeRlHaSh = 0x646f72616e646f6dULL; \
+  U64 v2_PeRlHaSh = 0x6c7967656e657261ULL; \
+  U64 v3_PeRlHaSh = 0x7465646279746573ULL; \
+\
+  U64 b_PeRlHaSh;                           \
+  U64 k0_PeRlHaSh = PERL_HASH_SEED_U64_1;   \
+  U64 k1_PeRlHaSh = PERL_HASH_SEED_U64_2;   \
+  U64 m_PeRlHaSh;                           \
+  const int left_PeRlHaSh = inlen_PeRlHaSh & 7; \
+  const U8 *end_PeRlHaSh = in_PeRlHaSh + inlen_PeRlHaSh - left_PeRlHaSh; \
+\
+  b_PeRlHaSh = ( ( U64 )(len) ) << 56; \
+  v3_PeRlHaSh ^= k1_PeRlHaSh; \
+  v2_PeRlHaSh ^= k0_PeRlHaSh; \
+  v1_PeRlHaSh ^= k1_PeRlHaSh; \
+  v0_PeRlHaSh ^= k0_PeRlHaSh; \
+\
+  for ( ; in_PeRlHaSh != end_PeRlHaSh; in_PeRlHaSh += 8 ) \
+  { \
+    m_PeRlHaSh = U8TO64_LE( in_PeRlHaSh ); \
+    v3_PeRlHaSh ^= m_PeRlHaSh; \
+    SIPROUND; \
+    SIPROUND; \
+    v0_PeRlHaSh ^= m_PeRlHaSh; \
+  } \
+\
+  switch( left_PeRlHaSh ) \
+  { \
+  case 7: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 6] )  << 48; \
+  case 6: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 5] )  << 40; \
+  case 5: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 4] )  << 32; \
+  case 4: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 3] )  << 24; \
+  case 3: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 2] )  << 16; \
+  case 2: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 1] )  <<  8; \
+  case 1: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 0] ); break; \
+  case 0: break; \
+  } \
+\
+  v3_PeRlHaSh ^= b_PeRlHaSh; \
+  SIPROUND; \
+  SIPROUND; \
+  v0_PeRlHaSh ^= b_PeRlHaSh; \
+\
+  v2_PeRlHaSh ^= 0xff; \
+  SIPROUND; \
+  SIPROUND; \
+  SIPROUND; \
+  SIPROUND; \
+  b_PeRlHaSh = v0_PeRlHaSh ^ v1_PeRlHaSh ^ v2_PeRlHaSh  ^ v3_PeRlHaSh; \
+  (hash)= (U32)(b_PeRlHaSh & U32_MAX); \
+} STMT_END
+
+#elif defined(PERL_HASH_FUNC_SUPERFAST)
+#define PERL_HASH_FUNC "SUPERFAST"
+/* FYI: This is the "Super-Fast" algorithm mentioned by Bob Jenkins in
+ * (http://burtleburtle.net/bob/hash/doobs.html)
+ * It is by Paul Hsieh (c) 2004 and is analysed here
+ * http://www.azillionmonkeys.com/qed/hash.html
+ * license terms are here:
+ * http://www.azillionmonkeys.com/qed/weblicense.html
+ */
+#undef get16bits
+#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
+  || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
+#define get16bits(d) (*((const U16 *) (d)))
 #endif
 
-/* Common base for PERL_HASH and PERL_HASH_INTERNAL that parameterises
- * the source of the seed. Not for direct use outside of hv.c. */
+#if !defined (get16bits)
+#define get16bits(d) ((((const U8 *)(d))[1] << UINT32_C(8))\
+                      +((const U8 *)(d))[0])
+#endif
+#define PERL_HASH(hash,str,len) \
+      STMT_START        { \
+        register const char * const strtmp_PeRlHaSh = (str); \
+        register const unsigned char *str_PeRlHaSh = (const unsigned char *)strtmp_PeRlHaSh; \
+        register U32 len_PeRlHaSh = (len); \
+        register U32 hash_PeRlHaSh = PERL_HASH_SEED_U32 ^ len; \
+        register U32 tmp_PeRlHaSh; \
+        register int rem_PeRlHaSh= len_PeRlHaSh & 3; \
+        len_PeRlHaSh >>= 2; \
+                            \
+        for (;len_PeRlHaSh > 0; len_PeRlHaSh--) { \
+            hash_PeRlHaSh  += get16bits (str_PeRlHaSh); \
+            tmp_PeRlHaSh    = (get16bits (str_PeRlHaSh+2) << 11) ^ hash_PeRlHaSh; \
+            hash_PeRlHaSh   = (hash_PeRlHaSh << 16) ^ tmp_PeRlHaSh; \
+            str_PeRlHaSh   += 2 * sizeof (U16); \
+            hash_PeRlHaSh  += hash_PeRlHaSh >> 11; \
+        } \
+        \
+        /* Handle end cases */ \
+        switch (rem_PeRlHaSh) { \
+            case 3: hash_PeRlHaSh += get16bits (str_PeRlHaSh); \
+                    hash_PeRlHaSh ^= hash_PeRlHaSh << 16; \
+                    hash_PeRlHaSh ^= str_PeRlHaSh[sizeof (U16)] << 18; \
+                    hash_PeRlHaSh += hash_PeRlHaSh >> 11; \
+                    break; \
+            case 2: hash_PeRlHaSh += get16bits (str_PeRlHaSh); \
+                    hash_PeRlHaSh ^= hash_PeRlHaSh << 11; \
+                    hash_PeRlHaSh += hash_PeRlHaSh >> 17; \
+                    break; \
+            case 1: hash_PeRlHaSh += *str_PeRlHaSh; \
+                    hash_PeRlHaSh ^= hash_PeRlHaSh << 10; \
+                    hash_PeRlHaSh += hash_PeRlHaSh >> 1; \
+        } \
+        \
+        /* Force "avalanching" of final 127 bits */ \
+        hash_PeRlHaSh ^= hash_PeRlHaSh << 3; \
+        hash_PeRlHaSh += hash_PeRlHaSh >> 5; \
+        hash_PeRlHaSh ^= hash_PeRlHaSh << 4; \
+        hash_PeRlHaSh += hash_PeRlHaSh >> 17; \
+        hash_PeRlHaSh ^= hash_PeRlHaSh << 25; \
+        (hash) = (hash_PeRlHaSh + (hash_PeRlHaSh >> 6)); \
+    } STMT_END
+
+#elif defined(PERL_HASH_FUNC_MURMUR3)
+#define PERL_HASH_FUNC "MURMUR3"
+#define PERL_HASH_SEED_BYTES 4
+
+/*-----------------------------------------------------------------------------
+ * MurmurHash3 was written by Austin Appleby, and is placed in the public
+ * domain.
+ *
+ * This implementation was originally written by Shane Day, and is also public domain,
+ * and was modified to function as a macro similar to other perl hash functions by
+ * Yves Orton.
+ *
+ * This is a portable ANSI C implementation of MurmurHash3_x86_32 (Murmur3A)
+ * with support for progressive processing.
+ *
+ * If you want to understand the MurmurHash algorithm you would be much better
+ * off reading the original source. Just point your browser at:
+ * http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
+ *
+ * How does it work?
+ *
+ * We can only process entire 32 bit chunks of input, except for the very end
+ * that may be shorter.
+ *
+ * To handle endianess I simply use a macro that reads a U32 and define
+ * that macro to be a direct read on little endian machines, a read and swap
+ * on big endian machines, or a byte-by-byte read if the endianess is unknown.
+ */
+
+
+/*-----------------------------------------------------------------------------
+ * Endianess, misalignment capabilities and util macros
+ *
+ * The following 3 macros are defined in this section. The other macros defined
+ * are only needed to help derive these 3.
+ *
+ * MURMUR_READ_UINT32(x)   Read a little endian unsigned 32-bit int
+ * MURMUR_UNALIGNED_SAFE   Defined if READ_UINT32 works on non-word boundaries
+ * MURMUR_ROTL32(x,r)      Rotate x left by r bits
+ */
 
-#define PERL_HASH_INTERNAL_(hash,str,len,internal) \
+/* Convention is to define __BYTE_ORDER == to one of these values */
+#if !defined(__BIG_ENDIAN)
+  #define __BIG_ENDIAN 4321
+#endif
+#if !defined(__LITTLE_ENDIAN)
+  #define __LITTLE_ENDIAN 1234
+#endif
+
+/* I386 */
+#if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(i386)
+  #define __BYTE_ORDER __LITTLE_ENDIAN
+  #define MURMUR_UNALIGNED_SAFE
+#endif
+
+/* gcc 'may' define __LITTLE_ENDIAN__ or __BIG_ENDIAN__ to 1 (Note the trailing __),
+ * or even _LITTLE_ENDIAN or _BIG_ENDIAN (Note the single _ prefix) */
+#if !defined(__BYTE_ORDER)
+  #if defined(__LITTLE_ENDIAN__) && __LITTLE_ENDIAN__==1 || defined(_LITTLE_ENDIAN) && _LITTLE_ENDIAN==1
+    #define __BYTE_ORDER __LITTLE_ENDIAN
+  #elif defined(__BIG_ENDIAN__) && __BIG_ENDIAN__==1 || defined(_BIG_ENDIAN) && _BIG_ENDIAN==1
+    #define __BYTE_ORDER __BIG_ENDIAN
+  #endif
+#endif
+
+/* gcc (usually) defines xEL/EB macros for ARM and MIPS endianess */
+#if !defined(__BYTE_ORDER)
+  #if defined(__ARMEL__) || defined(__MIPSEL__)
+    #define __BYTE_ORDER __LITTLE_ENDIAN
+  #endif
+  #if defined(__ARMEB__) || defined(__MIPSEB__)
+    #define __BYTE_ORDER __BIG_ENDIAN
+  #endif
+#endif
+
+/* Now find best way we can to READ_UINT32 */
+#if __BYTE_ORDER==__LITTLE_ENDIAN
+  /* CPU endian matches murmurhash algorithm, so read 32-bit word directly */
+  #define MURMUR_READ_UINT32(ptr)   (*((U32*)(ptr)))
+#elif __BYTE_ORDER==__BIG_ENDIAN
+  /* TODO: Add additional cases below where a compiler provided bswap32 is available */
+  #if defined(__GNUC__) && (__GNUC__>4 || (__GNUC__==4 && __GNUC_MINOR__>=3))
+    #define MURMUR_READ_UINT32(ptr)   (__builtin_bswap32(*((U32*)(ptr))))
+  #else
+    /* Without a known fast bswap32 we're just as well off doing this */
+    #define MURMUR_READ_UINT32(ptr)   (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)
+    #define MURMUR_UNALIGNED_SAFE
+  #endif
+#else
+  /* Unknown endianess so last resort is to read individual bytes */
+  #define MURMUR_READ_UINT32(ptr)   (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)
+
+  /* Since we're not doing word-reads we can skip the messing about with realignment */
+  #define MURMUR_UNALIGNED_SAFE
+#endif
+
+/* Find best way to ROTL32 */
+#if defined(_MSC_VER)
+  #include <stdlib.h>  /* Microsoft put _rotl declaration in here */
+  #define MURMUR_ROTL32(x,r)  _rotl(x,r)
+#else
+  /* gcc recognises this code and generates a rotate instruction for CPUs with one */
+  #define MURMUR_ROTL32(x,r)  (((U32)x << r) | ((U32)x >> (32 - r)))
+#endif
+
+
+/*-----------------------------------------------------------------------------
+ * Core murmurhash algorithm macros */
+
+#define MURMUR_C1  (0xcc9e2d51)
+#define MURMUR_C2  (0x1b873593)
+#define MURMUR_C3  (0xe6546b64)
+#define MURMUR_C4  (0x85ebca6b)
+#define MURMUR_C5  (0xc2b2ae35)
+
+/* This is the main processing body of the algorithm. It operates
+ * on each full 32-bits of input. */
+#define MURMUR_DOBLOCK(h1, k1) STMT_START { \
+    k1 *= MURMUR_C1; \
+    k1 = MURMUR_ROTL32(k1,15); \
+    k1 *= MURMUR_C2; \
+    \
+    h1 ^= k1; \
+    h1 = MURMUR_ROTL32(h1,13); \
+    h1 = h1 * 5 + MURMUR_C3; \
+} STMT_END
+
+
+/* Append unaligned bytes to carry, forcing hash churn if we have 4 bytes */
+/* cnt=bytes to process, h1=name of h1 var, c=carry, n=bytes in c, ptr/len=payload */
+#define MURMUR_DOBYTES(cnt, h1, c, n, ptr, len) STMT_START { \
+    int MURMUR_DOBYTES_i = cnt; \
+    while(MURMUR_DOBYTES_i--) { \
+        c = c>>8 | *ptr++<<24; \
+        n++; len--; \
+        if(n==4) { \
+            MURMUR_DOBLOCK(h1, c); \
+            n = 0; \
+        } \
+    } \
+} STMT_END
+
+/* process the last 1..3 bytes and finalize */
+#define MURMUR_FINALIZE(hash, PeRlHaSh_len, PeRlHaSh_k1, PeRlHaSh_h1, PeRlHaSh_carry, PeRlHaSh_bytes_in_carry, PeRlHaSh_ptr, PeRlHaSh_total_length) STMT_START { \
+    /* Advance over whole 32-bit chunks, possibly leaving 1..3 bytes */\
+    PeRlHaSh_len -= PeRlHaSh_len/4*4;                           \
+                                                                \
+    /* Append any remaining bytes into carry */                 \
+    MURMUR_DOBYTES(PeRlHaSh_len, PeRlHaSh_h1, PeRlHaSh_carry, PeRlHaSh_bytes_in_carry, PeRlHaSh_ptr, PeRlHaSh_len); \
+                                                                \
+    if (PeRlHaSh_bytes_in_carry) {                                           \
+        PeRlHaSh_k1 = PeRlHaSh_carry >> ( 4 - PeRlHaSh_bytes_in_carry ) * 8; \
+        PeRlHaSh_k1 *= MURMUR_C1;                               \
+        PeRlHaSh_k1 = MURMUR_ROTL32(PeRlHaSh_k1,15);                   \
+        PeRlHaSh_k1 *= MURMUR_C2;                               \
+        PeRlHaSh_h1 ^= PeRlHaSh_k1;                             \
+    }                                                           \
+    PeRlHaSh_h1 ^= PeRlHaSh_total_length;                       \
+                                                                \
+    /* fmix */                                                  \
+    PeRlHaSh_h1 ^= PeRlHaSh_h1 >> 16;                           \
+    PeRlHaSh_h1 *= MURMUR_C4;                                   \
+    PeRlHaSh_h1 ^= PeRlHaSh_h1 >> 13;                           \
+    PeRlHaSh_h1 *= MURMUR_C5;                                   \
+    PeRlHaSh_h1 ^= PeRlHaSh_h1 >> 16;                           \
+    (hash)= PeRlHaSh_h1;                                        \
+} STMT_END
+
+/* now we create the hash function */
+
+#if defined(UNALIGNED_SAFE)
+#define PERL_HASH(hash,str,len) STMT_START { \
+        register const char * const s_PeRlHaSh_tmp = (str); \
+        register const unsigned char *PeRlHaSh_ptr = (const unsigned char *)s_PeRlHaSh_tmp; \
+        register I32 PeRlHaSh_len = len;    \
+                                            \
+        U32 PeRlHaSh_h1 = PERL_HASH_SEED_U32;   \
+        U32 PeRlHaSh_k1;                    \
+        U32 PeRlHaSh_carry = 0;             \
+                                            \
+        const unsigned char *PeRlHaSh_end;  \
+                                            \
+        int PeRlHaSh_bytes_in_carry = 0; /* bytes in carry */ \
+        I32 PeRlHaSh_total_length= PeRlHaSh_len; \
+                                            \
+        /* This CPU handles unaligned word access */            \
+        /* Process 32-bit chunks */                             \
+        PeRlHaSh_end = PeRlHaSh_ptr + PeRlHaSh_len/4*4;         \
+        for( ; PeRlHaSh_ptr < PeRlHaSh_end ; PeRlHaSh_ptr+=4) { \
+            PeRlHaSh_k1 = MURMUR_READ_UINT32(PeRlHaSh_ptr);        \
+            MURMUR_DOBLOCK(PeRlHaSh_h1, PeRlHaSh_k1);                  \
+        }                                                       \
+        \
+        MURMUR_FINALIZE(hash, PeRlHaSh_len, PeRlHaSh_k1, PeRlHaSh_h1, PeRlHaSh_carry, PeRlHaSh_bytes_in_carry, PeRlHaSh_ptr, PeRlHaSh_total_length);\
+    } STMT_END
+#else
+#define PERL_HASH(hash,str,len) STMT_START { \
+        register const char * const s_PeRlHaSh_tmp = (str); \
+        register const unsigned char *PeRlHaSh_ptr = (const unsigned char *)s_PeRlHaSh_tmp; \
+        register I32 PeRlHaSh_len = len;    \
+                                            \
+        U32 PeRlHaSh_h1 = PERL_HASH_SEED_U32;   \
+        U32 PeRlHaSh_k1;                    \
+        U32 PeRlHaSh_carry = 0;             \
+                                            \
+        const unsigned char *PeRlHaSh_end;  \
+                                            \
+        int PeRlHaSh_bytes_in_carry = 0; /* bytes in carry */ \
+        I32 PeRlHaSh_total_length= PeRlHaSh_len; \
+                                            \
+        /* This CPU does not handle unaligned word access */    \
+                                                                \
+        /* Consume enough so that the next data byte is word aligned */ \
+        int PeRlHaSh_i = -(long)PeRlHaSh_ptr & 3;                       \
+        if(PeRlHaSh_i && PeRlHaSh_i <= PeRlHaSh_len) {                  \
+          MURMUR_DOBYTES(PeRlHaSh_i, PeRlHaSh_h1, PeRlHaSh_carry, PeRlHaSh_bytes_in_carry, PeRlHaSh_ptr, PeRlHaSh_len);\
+        }                                                               \
+        \
+        /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */ \
+        PeRlHaSh_end = PeRlHaSh_ptr + PeRlHaSh_len/4*4;                 \
+        switch(PeRlHaSh_bytes_in_carry) { /* how many bytes in carry */                  \
+            case 0: /* c=[----]  w=[3210]  b=[3210]=w            c'=[----] */ \
+            for( ; PeRlHaSh_ptr < PeRlHaSh_end ; PeRlHaSh_ptr+=4) { \
+                PeRlHaSh_k1 = MURMUR_READ_UINT32(PeRlHaSh_ptr);        \
+                MURMUR_DOBLOCK(PeRlHaSh_h1, PeRlHaSh_k1);                  \
+            }                                                       \
+            break;                                                  \
+            case 1: /* c=[0---]  w=[4321]  b=[3210]=c>>24|w<<8   c'=[4---] */   \
+            for( ; PeRlHaSh_ptr < PeRlHaSh_end ; PeRlHaSh_ptr+=4) { \
+                PeRlHaSh_k1 = PeRlHaSh_carry>>24;                   \
+                PeRlHaSh_carry = MURMUR_READ_UINT32(PeRlHaSh_ptr);             \
+                PeRlHaSh_k1 |= PeRlHaSh_carry<<8;                       \
+                MURMUR_DOBLOCK(PeRlHaSh_h1, PeRlHaSh_k1);                  \
+            }                                                       \
+            break;                                                  \
+            case 2: /* c=[10--]  w=[5432]  b=[3210]=c>>16|w<<16  c'=[54--] */   \
+            for( ; PeRlHaSh_ptr < PeRlHaSh_end ; PeRlHaSh_ptr+=4) { \
+                PeRlHaSh_k1 = PeRlHaSh_carry>>16;                   \
+                PeRlHaSh_carry = MURMUR_READ_UINT32(PeRlHaSh_ptr);             \
+                PeRlHaSh_k1 |= PeRlHaSh_carry<<16;                      \
+                MURMUR_DOBLOCK(PeRlHaSh_h1, PeRlHaSh_k1);                  \
+            }                                                       \
+            break;                                                  \
+            case 3: /* c=[210-]  w=[6543]  b=[3210]=c>>8|w<<24   c'=[654-] */   \
+            for( ; PeRlHaSh_ptr < PeRlHaSh_end ; PeRlHaSh_ptr+=4) { \
+                PeRlHaSh_k1 = PeRlHaSh_carry>>8;                    \
+                PeRlHaSh_carry = MURMUR_READ_UINT32(PeRlHaSh_ptr);             \
+                PeRlHaSh_k1 |= PeRlHaSh_carry<<24;                      \
+                MURMUR_DOBLOCK(PeRlHaSh_h1, PeRlHaSh_k1);                  \
+            }                                                       \
+        }                                                           \
+                                                                    \
+        MURMUR_FINALIZE(hash, PeRlHaSh_len, PeRlHaSh_k1, PeRlHaSh_h1, PeRlHaSh_carry, PeRlHaSh_bytes_in_carry, PeRlHaSh_ptr, PeRlHaSh_total_length);\
+    } STMT_END
+#endif
+
+#elif defined(PERL_HASH_FUNC_DJB2)
+#define PERL_HASH_FUNC "DJB2"
+#define PERL_HASH_SEED_BYTES 4
+#define PERL_HASH(hash,str,len) \
+     STMT_START        { \
+        register const char * const s_PeRlHaSh_tmp = (str); \
+        register const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \
+        register I32 i_PeRlHaSh = len; \
+        register U32 hash_PeRlHaSh = PERL_HASH_SEED_U32 ^ len; \
+        while (i_PeRlHaSh--) { \
+            hash_PeRlHaSh = ((hash_PeRlHaSh << 5) + hash_PeRlHaSh) + *s_PeRlHaSh++; \
+        } \
+        (hash) = hash_PeRlHaSh;\
+    } STMT_END
+
+#elif defined(PERL_HASH_FUNC_SDBM)
+#define PERL_HASH_FUNC "SDBM"
+#define PERL_HASH_SEED_BYTES 4
+#define PERL_HASH(hash,str,len) \
+     STMT_START        { \
+        register const char * const s_PeRlHaSh_tmp = (str); \
+        register const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \
+        register I32 i_PeRlHaSh = len; \
+        register U32 hash_PeRlHaSh = PERL_HASH_SEED_U32 ^ len; \
+        while (i_PeRlHaSh--) { \
+            hash_PeRlHaSh = (hash_PeRlHaSh << 6) + (hash_PeRlHaSh << 16) - hash_PeRlHaSh + *s_PeRlHaSh++; \
+        } \
+        (hash) = hash_PeRlHaSh;\
+    } STMT_END
+
+#elif defined(PERL_HASH_FUNC_ONE_AT_A_TIME)
+/* DEFAULT/HISTORIC HASH FUNCTION */
+#define PERL_HASH_FUNC "ONE_AT_A_TIME"
+#define PERL_HASH_SEED_BYTES 4
+
+/* FYI: This is the "One-at-a-Time" algorithm by Bob Jenkins
+ * from requirements by Colin Plumb.
+ * (http://burtleburtle.net/bob/hash/doobs.html) */
+#define PERL_HASH(hash,str,len) \
      STMT_START	{ \
-	const char * const s_PeRlHaSh_tmp = str; \
-	const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \
-	I32 i_PeRlHaSh = len; \
-	U32 hash_PeRlHaSh = (internal ? PL_rehash_seed : PERL_HASH_SEED); \
+        register const char * const s_PeRlHaSh_tmp = (str); \
+        register const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \
+        register I32 i_PeRlHaSh = len; \
+        register U32 hash_PeRlHaSh = PERL_HASH_SEED_U32 ^ len; \
 	while (i_PeRlHaSh--) { \
-	    hash_PeRlHaSh += *s_PeRlHaSh++; \
+            hash_PeRlHaSh += (U8)*s_PeRlHaSh++; \
 	    hash_PeRlHaSh += (hash_PeRlHaSh << 10); \
 	    hash_PeRlHaSh ^= (hash_PeRlHaSh >> 6); \
 	} \
@@ -154,7 +630,10 @@ struct xpvhv {
 	hash_PeRlHaSh ^= (hash_PeRlHaSh >> 11); \
 	(hash) = (hash_PeRlHaSh + (hash_PeRlHaSh << 15)); \
     } STMT_END
-
+#endif
+#ifndef PERL_HASH
+#error "No hash function defined!"
+#endif
 /*
 =head1 Hash Manipulation Functions
 
@@ -358,10 +837,6 @@ C<SV*>.
 #define HvLAZYDEL_on(hv)	(SvFLAGS(hv) |= SVphv_LAZYDEL)
 #define HvLAZYDEL_off(hv)	(SvFLAGS(hv) &= ~SVphv_LAZYDEL)
 
-#define HvREHASH(hv)		(SvFLAGS(hv) & SVphv_REHASH)
-#define HvREHASH_on(hv)		(SvFLAGS(hv) |= SVphv_REHASH)
-#define HvREHASH_off(hv)	(SvFLAGS(hv) &= ~SVphv_REHASH)
-
 #ifndef PERL_CORE
 #  define Nullhe Null(HE*)
 #endif
@@ -372,7 +847,6 @@ C<SV*>.
 #define HeKLEN(he)		HEK_LEN(HeKEY_hek(he))
 #define HeKUTF8(he)  HEK_UTF8(HeKEY_hek(he))
 #define HeKWASUTF8(he)  HEK_WASUTF8(HeKEY_hek(he))
-#define HeKREHASH(he)  HEK_REHASH(HeKEY_hek(he))
 #define HeKLEN_UTF8(he)  (HeKUTF8(he) ? -HeKLEN(he) : HeKLEN(he))
 #define HeKFLAGS(he)  HEK_FLAGS(HeKEY_hek(he))
 #define HeVAL(he)		(he)->he_valu.hent_val
@@ -407,7 +881,6 @@ C<SV*>.
 
 #define HVhek_UTF8	0x01 /* Key is utf8 encoded. */
 #define HVhek_WASUTF8	0x02 /* Key is bytes here, but was supplied as utf8. */
-#define HVhek_REHASH	0x04 /* This key is in an hv using a custom HASH . */
 #define HVhek_UNSHARED	0x08 /* This key isn't a shared hash key. */
 #define HVhek_FREEKEY	0x100 /* Internal flag to say key is malloc()ed.  */
 #define HVhek_PLACEHOLD	0x200 /* Internal flag to create placeholder.
@@ -417,16 +890,7 @@ C<SV*>.
 				    converted to bytes. */
 #define HVhek_MASK	0xFF
 
-/* Which flags enable HvHASKFLAGS? Somewhat a hack on a hack, as
-   HVhek_REHASH is only needed because the rehash flag has to be duplicated
-   into all keys as hv_iternext has no access to the hash flags. At this
-   point Storable's tests get upset, because sometimes hashes are "keyed"
-   and sometimes not, depending on the order of data insertion, and whether
-   it triggered rehashing. So currently HVhek_REHASH is exempt.
-   Similarly UNSHARED
-*/
-   
-#define HVhek_ENABLEHVKFLAGS	(HVhek_MASK & ~(HVhek_REHASH|HVhek_UNSHARED))
+#define HVhek_ENABLEHVKFLAGS        (HVhek_MASK & ~(HVhek_UNSHARED))
 
 #define HEK_UTF8(hek)		(HEK_FLAGS(hek) & HVhek_UTF8)
 #define HEK_UTF8_on(hek)	(HEK_FLAGS(hek) |= HVhek_UTF8)
@@ -434,8 +898,6 @@ C<SV*>.
 #define HEK_WASUTF8(hek)	(HEK_FLAGS(hek) & HVhek_WASUTF8)
 #define HEK_WASUTF8_on(hek)	(HEK_FLAGS(hek) |= HVhek_WASUTF8)
 #define HEK_WASUTF8_off(hek)	(HEK_FLAGS(hek) &= ~HVhek_WASUTF8)
-#define HEK_REHASH(hek)		(HEK_FLAGS(hek) & HVhek_REHASH)
-#define HEK_REHASH_on(hek)	(HEK_FLAGS(hek) |= HVhek_REHASH)
 
 /* calculate HV array allocation */
 #ifndef PERL_USE_LARGE_HV_ALLOC
author	Yves Orton <demerphq@gmail.com>	2012-11-17 14:12:04 +0100
committer	Yves Orton <demerphq@gmail.com>	2012-11-17 19:19:43 +0100
commit	7dc8663964c66a698d31bbdc8e8abed69bddeec3 (patch)
tree	92f39aa5770c2f401bbe134db921d68cbe4f758a /hv.h
parent	867b16b50f413adfdb2addcab33ad1d6e61da9d5 (diff)
download	perl-7dc8663964c66a698d31bbdc8e8abed69bddeec3.tar.gz