summaryrefslogtreecommitdiff
path: root/ext/hash/murmur/PMurHash128.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/hash/murmur/PMurHash128.c')
-rw-r--r--ext/hash/murmur/PMurHash128.c640
1 files changed, 640 insertions, 0 deletions
diff --git a/ext/hash/murmur/PMurHash128.c b/ext/hash/murmur/PMurHash128.c
new file mode 100644
index 0000000000..2856542190
--- /dev/null
+++ b/ext/hash/murmur/PMurHash128.c
@@ -0,0 +1,640 @@
+/*-----------------------------------------------------------------------------
+ * MurmurHash3 was written by Austin Appleby, and is placed in the public
+ * domain.
+ *
+ * This is a c++ implementation of MurmurHash3_128 with support for progressive
+ * processing based on PMurHash implementation written by Shane Day.
+ */
+
+/*-----------------------------------------------------------------------------
+
+If you want to understand the MurmurHash algorithm you would be much better
+off reading the original source. Just point your browser at:
+http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
+
+
+What this version provides?
+
+1. Progressive data feeding. Useful when the entire payload to be hashed
+does not fit in memory or when the data is streamed through the application.
+Also useful when hashing a number of strings with a common prefix. A partial
+hash of a prefix string can be generated and reused for each suffix string.
+
+How does it work?
+
+We can only process entire 128 bit chunks of input, except for the very end
+that may be shorter. So along with the partial hash we need to give back to
+the caller a carry containing up to 15 bytes that we were unable to process.
+This carry also needs to record the number of bytes the carry holds. I use
+the low 4 bits as a count (0..15) and the carry bytes are shifted into the
+high byte in stream order.
+
+To handle endianess I simply use a macro that reads an uint and define
+that macro to be a direct read on little endian machines, a read and swap
+on big endian machines.
+
+-----------------------------------------------------------------------------*/
+
+
+#include "PMurHash128.h"
+
+/*-----------------------------------------------------------------------------
+ * Endianess, misalignment capabilities and util macros
+ *
+ * The following 5 macros are defined in this section. The other macros defined
+ * are only needed to help derive these 5.
+ *
+ * READ_UINT32(x,i) Read a little endian unsigned 32-bit int at index
+ * READ_UINT64(x,i) Read a little endian unsigned 64-bit int at index
+ * UNALIGNED_SAFE Defined if READ_UINTXX works on non-word boundaries
+ * ROTL32(x,r) Rotate x left by r bits
+ * ROTL64(x,r) Rotate x left by r bits
+ * BIG_CONSTANT
+ * FORCE_INLINE
+ */
+
+/* I386 or AMD64 */
+#if defined(_M_I86) || defined(_M_IX86) || defined(_X86_) || defined(__i386__) || defined(__i386) || defined(i386) \
+ || defined(_M_X64) || defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64)
+ #define UNALIGNED_SAFE
+#endif
+
+/* Find best way to ROTL */
+#if defined(_MSC_VER)
+ #define FORCE_INLINE static __forceinline
+ #include <stdlib.h> /* Microsoft put _rotl declaration in here */
+ #define ROTL32(x,y) _rotl(x,y)
+ #define ROTL64(x,y) _rotl64(x,y)
+ #define BIG_CONSTANT(x) (x)
+#else
+ #define FORCE_INLINE static inline __attribute__((always_inline))
+ /* gcc recognises this code and generates a rotate instruction for CPUs with one */
+ #define ROTL32(x,r) (((uint32_t)x << r) | ((uint32_t)x >> (32 - r)))
+ #define ROTL64(x,r) (((uint64_t)x << r) | ((uint64_t)x >> (64 - r)))
+ #define BIG_CONSTANT(x) (x##LLU)
+#endif
+
+#include "endianness.h"
+
+#define READ_UINT64(ptr,i) getblock64((uint64_t *)ptr,i)
+#define READ_UINT32(ptr,i) getblock32((uint32_t *)ptr,i)
+
+//-----------------------------------------------------------------------------
+// Finalization mix - force all bits of a hash block to avalanche
+
+FORCE_INLINE uint32_t fmix32 ( uint32_t h )
+{
+ h ^= h >> 16;
+ h *= 0x85ebca6b;
+ h ^= h >> 13;
+ h *= 0xc2b2ae35;
+ h ^= h >> 16;
+
+ return h;
+}
+
+//----------
+
+FORCE_INLINE uint64_t fmix64 ( uint64_t k )
+{
+ k ^= k >> 33;
+ k *= BIG_CONSTANT(0xff51afd7ed558ccd);
+ k ^= k >> 33;
+ k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
+ k ^= k >> 33;
+
+ return k;
+}
+
+/*-----------------------------------------------------------------------------*
+ PMurHash128x86
+ *-----------------------------------------------------------------------------*/
+/*-----------------------------------------------------------------------------
+ * Core murmurhash algorithm macros */
+
+static const uint32_t kC1 = 0x239b961b;
+static const uint32_t kC2 = 0xab0e9789;
+static const uint32_t kC3 = 0x38b34ae5;
+static const uint32_t kC4 = 0xa1e38b93;
+
+/* This is the main processing body of the algorithm. It operates
+ * on each full 128-bits of input. */
+#define doblock128x86(h1, h2, h3, h4, k1, k2, k3,k4)\
+do {\
+ k1 *= kC1; k1 = ROTL32(k1,15); k1 *= kC2; h1 ^= k1;\
+\
+ h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;\
+\
+ k2 *= kC2; k2 = ROTL32(k2,16); k2 *= kC3; h2 ^= k2;\
+\
+ h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;\
+\
+ k3 *= kC3; k3 = ROTL32(k3,17); k3 *= kC4; h3 ^= k3;\
+\
+ h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;\
+\
+ k4 *= kC4; k4 = ROTL32(k4,18); k4 *= kC1; h4 ^= k4;\
+\
+ h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;\
+} while(0)
+
+/* Append unaligned bytes to carry, forcing hash churn if we have 16 bytes */
+/* cnt=bytes to process, h1-h4=hash k1-k4=carry, n=bytes in carry, ptr/len=payload */
+#define dobytes128x86(cnt, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len)\
+do {\
+ unsigned __cnt = cnt;\
+ for(;__cnt--; len--) {\
+ switch(n) {\
+ case 0: case 1: case 2: case 3:\
+ k1 = k1>>8 | (uint32_t)*ptr++<<24;\
+ ++n; break;\
+\
+ case 4: case 5: case 6: case 7:\
+ k2 = k2>>8 | (uint32_t)*ptr++<<24;\
+ ++n; break;\
+\
+ case 8: case 9: case 10: case 11:\
+ k3 = k3>>8 | (uint32_t)*ptr++<<24;\
+ ++n; break;\
+\
+ case 12: case 13: case 14:\
+ k4 = k4>>8 | (uint32_t)*ptr++<<24;\
+ ++n; break;\
+\
+ case 15:\
+ k4 = k4>>8 | (uint32_t)*ptr++<<24;\
+ doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);\
+ n = 0; break;\
+ }\
+ }\
+} while(0)
+
+/* Finalize a hash. To match the original Murmur3_128x86 the total_length must be provided */
+void PMurHash128x86_Result(const uint32_t *ph, const uint32_t *pcarry, uint32_t total_length, uint32_t *out)
+{
+ uint32_t h1 = ph[0];
+ uint32_t h2 = ph[1];
+ uint32_t h3 = ph[2];
+ uint32_t h4 = ph[3];
+
+ uint32_t k1, k2, k3, k4 = pcarry[3];
+
+ int n = k4 & 15;
+ switch(n) {
+ case 1: case 2: case 3: case 4:
+ k1 = pcarry[0] >> (4-n)*8;
+ goto finrot_k1;
+
+ case 5: case 6: case 7: case 8:
+ k2 = pcarry[1] >> (8-n)*8;
+ goto finrot_k21;
+
+ case 9: case 10: case 11: case 12:
+ k3 = pcarry[2] >> (12-n)*8;
+ goto finrot_k321;
+
+ case 13: case 14: case 15:
+ k4 >>= (16-n)*8;
+ goto finrot_k4321;
+
+ default:
+ goto skiprot;
+ }
+finrot_k4321:
+ k4 *= kC4; k4 = ROTL32(k4,18); k4 *= kC1; h4 ^= k4;
+ k3 = pcarry[2];
+finrot_k321:
+ k3 *= kC3; k3 = ROTL32(k3,17); k3 *= kC4; h3 ^= k3;
+ k2 = pcarry[1];
+finrot_k21:
+ k2 *= kC2; k2 = ROTL32(k2,16); k2 *= kC3; h2 ^= k2;
+ k1 = pcarry[0];
+finrot_k1:
+ k1 *= kC1; k1 = ROTL32(k1,15); k1 *= kC2; h1 ^= k1;
+skiprot:
+
+ //----------
+ // finalization
+
+ h1 ^= total_length; h2 ^= total_length;
+ h3 ^= total_length; h4 ^= total_length;
+
+ h1 += h2; h1 += h3; h1 += h4;
+ h2 += h1; h3 += h1; h4 += h1;
+
+ h1 = fmix32(h1);
+ h2 = fmix32(h2);
+ h3 = fmix32(h3);
+ h4 = fmix32(h4);
+
+ h1 += h2; h1 += h3; h1 += h4;
+ h2 += h1; h3 += h1; h4 += h1;
+
+ out[0] = h1;
+ out[1] = h2;
+ out[2] = h3;
+ out[3] = h4;
+}
+
+/*---------------------------------------------------------------------------*/
+
+/* Main hashing function. Initialise carry[4] to {0,0,0,0} and h[4] to an initial {seed,seed,seed,seed}
+ * if wanted. Both ph and pcarry are required arguments. */
+void PMurHash128x86_Process(uint32_t * const ph, uint32_t * const pcarry, const void * const key, int len)
+{
+ uint32_t h1 = ph[0];
+ uint32_t h2 = ph[1];
+ uint32_t h3 = ph[2];
+ uint32_t h4 = ph[3];
+
+ uint32_t k1 = pcarry[0];
+ uint32_t k2 = pcarry[1];
+ uint32_t k3 = pcarry[2];
+ uint32_t k4 = pcarry[3];
+
+ const uint8_t *ptr = (uint8_t*)key;
+ const uint8_t *end;
+
+ /* Extract carry count from low 4 bits of c value */
+ int n = k4 & 15;
+
+#if defined(UNALIGNED_SAFE)
+ /* This CPU handles unaligned word access */
+// #pragma message ( "UNALIGNED_SAFE" )
+ /* Consume any carry bytes */
+ int i = (16-n) & 15;
+ if(i && i <= len) {
+ dobytes128x86(i, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len);
+ }
+
+ /* Process 128-bit chunks */
+ end = ptr + (len & ~15);
+ for( ; ptr < end ; ptr+=16) {
+ k1 = READ_UINT32(ptr, 0);
+ k2 = READ_UINT32(ptr, 1);
+ k3 = READ_UINT32(ptr, 2);
+ k4 = READ_UINT32(ptr, 3);
+ doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);
+ }
+
+#else /*UNALIGNED_SAFE*/
+ /* This CPU does not handle unaligned word access */
+// #pragma message ( "ALIGNED" )
+ /* Consume enough so that the next data byte is word aligned */
+ int i = -(intptr_t)(void *)ptr & 3;
+ if(i && i <= len) {
+ dobytes128x86(i, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len);
+ }
+ /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */
+ end = ptr + (len & ~15);
+
+ switch(n) { /* how many bytes in c */
+ case 0: /*
+ k1=[----] k2=[----] k2=[----] k4=[----] w=[3210 7654 ba98 fedc] b=[3210 7654 ba98 fedc] */
+ for( ; ptr < end ; ptr+=16) {
+ k1 = READ_UINT32(ptr, 0);
+ k2 = READ_UINT32(ptr, 1);
+ k3 = READ_UINT32(ptr, 2);
+ k4 = READ_UINT32(ptr, 3);
+ doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);
+ }
+ break;
+ case 1: case 2: case 3: /*
+ k1=[10--] k2=[----] k3=[----] k4=[----] w=[5432 9876 dcba hgfe] b=[3210 7654 ba98 fedc] k1'=[hg--] */
+ {
+ const int lshift = n*8, rshift = 32-lshift;
+ for( ; ptr < end ; ptr+=16) {
+ uint32_t c = k1>>rshift; // --10
+ k2 = READ_UINT32(ptr, 0); // 5432
+ c |= k2<<lshift; // 3210.
+ k1 = READ_UINT32(ptr, 1); // 9876
+ k2 = k1<<lshift | k2>>rshift; // 7654.
+ k4 = READ_UINT32(ptr, 2); // dcba
+ k3 = k4<<lshift | k1>>rshift; // ba98.
+ k1 = READ_UINT32(ptr, 3); // hgfe.
+ k4 = k1<<lshift | k4>>rshift; // fedc.
+ doblock128x86(h1, h2, h3, h4, c, k2, k3, k4);
+ }
+ }
+ break;
+ case 4: /*
+ k1=[3210] k2=[----] k3=[----] k4=[----] w=[7654 ba98 fedc jihg] b=[3210 7654 ba98 fedc] k1'=[jihg] */
+ for( ; ptr < end ; ptr+=16) {
+ k2 = READ_UINT32(ptr, 0);
+ k3 = READ_UINT32(ptr, 1);
+ k4 = READ_UINT32(ptr, 2);
+ doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);
+ k1 = READ_UINT32(ptr, 3);
+ }
+ break;
+ case 5: case 6: case 7: /*
+ k1=[3210] k2=[54--] k3=[----] k4=[----] w=[9876 dcba hgfe lkji] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[lk--] */
+ {
+ const int lshift = n*8-32, rshift = 32-lshift;
+ for( ; ptr < end ; ptr+=16) {
+ uint32_t c = k2>>rshift; // --54
+ k3 = READ_UINT32(ptr, 0); // 9876
+ c |= k3<<lshift; // 7654.
+ k4 = READ_UINT32(ptr, 1); // dcba
+ k3 = k4<<lshift | k3>>rshift; // ba98.
+ k2 = READ_UINT32(ptr, 2); // hgfe
+ k4 = k2<<lshift | k4>>rshift; // fedc.
+ doblock128x86(h1, h2, h3, h4, k1, c, k3, k4);
+ k1 = k2>>rshift; // --hg
+ k2 = READ_UINT32(ptr, 3); // lkji.
+ k1 |= k2<<lshift; // jihg.
+ }
+ }
+ case 8: /*
+ k1=[3210] k2=[7654] k3=[----] k4=[----] w=[ba98 fedc jihg nmlk] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] */
+ for( ; ptr < end ; ptr+=16) {
+ k3 = READ_UINT32(ptr, 0);
+ k4 = READ_UINT32(ptr, 1);
+ doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);
+ k1 = READ_UINT32(ptr, 2);
+ k2 = READ_UINT32(ptr, 3);
+ }
+ break;
+ case 9: case 10: case 11: /*
+ k1=[3210] k2=[7654] k3=[98--] k4=[----] w=[dcba hgfe lkji ponm] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] k3'=[po--] */
+ {
+ const int lshift = n*8-64, rshift = 32-lshift;
+ for( ; ptr < end ; ptr+=16) {
+ uint32_t c = k3>>rshift; // --98
+ k4 = READ_UINT32(ptr, 0); // dcba
+ c |= k4<<lshift; // ba98.
+ k3 = READ_UINT32(ptr, 1); // hgfe
+ k4 = k3<<lshift | k4>>rshift; // fedc.
+ doblock128x86(h1, h2, h3, h4, k1, k2, c, k4);
+ k2 = READ_UINT32(ptr, 2); // lkji
+ k1 = k2<<lshift | k3>>rshift; // jihg.
+ k3 = READ_UINT32(ptr, 3); // ponm.
+ k2 = k3<<lshift | k2>>rshift; // nmlk.
+ }
+ }
+ case 12: /*
+ k1=[3210] k2=[7654] k3=[ba98] k4=[----] w=[fedc jihg nmlk rqpo] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] k3'=[rqpo] */
+ for( ; ptr < end ; ptr+=16) {
+ k4 = READ_UINT32(ptr, 0);
+ doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);
+ k1 = READ_UINT32(ptr, 1);
+ k2 = READ_UINT32(ptr, 2);
+ k3 = READ_UINT32(ptr, 3);
+ }
+ break;
+ default: /* 12 < n <= 15
+ k1=[3210] k2=[7654] k3=[ba98] k4=[dc--] w=[hgfe lkji ponm tsrq] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] k3'=[rqpo] k3'=[ts--] */
+ {
+ const int lshift = n*8-96, rshift = 32-lshift;
+ for( ; ptr < end ; ptr+=16) {
+ uint32_t c = k4>>rshift; // --dc
+ k4 = READ_UINT32(ptr, 0); // hgfe
+ c |= k4<<lshift; // fedc.
+ doblock128x86(h1, h2, h3, h4, k1, k2, k3, c);
+ k3 = READ_UINT32(ptr, 1); // lkji
+ k1 = k3<<lshift | k4>>rshift; // jihg.
+ c = READ_UINT32(ptr, 2); // ponm
+ k2 = c<<lshift | k3>>rshift; // nmlk.
+ k4 = READ_UINT32(ptr, 3); // tsrq.
+ k3 = k4<<lshift | c>>rshift; // rqpo.
+ }
+ }
+ }
+#endif /*UNALIGNED_SAFE*/
+
+ /* Advance over whole 128-bit chunks, possibly leaving 1..15 bytes */
+ len -= len & ~15;
+
+ /* Append any remaining bytes into carry */
+ dobytes128x86(len, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len);
+
+ /* Copy out new running hash and carry */
+ ph[0] = h1;
+ ph[1] = h2;
+ ph[2] = h3;
+ ph[3] = h4;
+ pcarry[0] = k1;
+ pcarry[1] = k2;
+ pcarry[2] = k3;
+ pcarry[3] = (k4 & ~0xff) | n;
+}
+
+/*---------------------------------------------------------------------------*/
+
+/* All in one go */
+
+/* MurmurHash3_x86_128 api */
+void PMurHash128x86(const void * key, const int len, uint32_t seed, void * out)
+{
+ uint32_t carry[4] = {0, 0, 0, 0};
+ uint32_t h[4] = {seed, seed, seed, seed};
+ PMurHash128x86_Process(h, carry, key, len);
+ PMurHash128x86_Result(h, carry, (uint32_t) len, (uint32_t *) out);
+}
+
+/*-----------------------------------------------------------------------------*
+ PMurHash128x64
+ *-----------------------------------------------------------------------------*/
+/*-----------------------------------------------------------------------------
+ * Core murmurhash algorithm macros */
+
+static const uint64_t kC1L = BIG_CONSTANT(0x87c37b91114253d5);
+static const uint64_t kC2L = BIG_CONSTANT(0x4cf5ad432745937f);
+
+/* This is the main processing body of the algorithm. It operates
+ * on each full 128-bits of input. */
+#define doblock128x64(h1, h2, k1, k2)\
+do {\
+ k1 *= kC1L; k1 = ROTL64(k1,31); k1 *= kC2L; h1 ^= k1;\
+\
+ h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;\
+\
+ k2 *= kC2L; k2 = ROTL64(k2,33); k2 *= kC1L; h2 ^= k2;\
+\
+ h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;\
+} while(0)
+
+/* Append unaligned bytes to carry, forcing hash churn if we have 16 bytes */
+/* cnt=bytes to process, h1,h2=hash k1,k2=carry, n=bytes in carry, ptr/len=payload */
+#define dobytes128x64(cnt, h1, h2, k1, k2, n, ptr, len) \
+do {\
+ unsigned __cnt = cnt;\
+ for(;__cnt--; len--) {\
+ switch(n) {\
+ case 0: case 1: case 2: case 3:\
+ case 4: case 5: case 6: case 7:\
+ k1 = k1>>8 | (uint64_t)*ptr++<<56;\
+ n++; break;\
+\
+ case 8: case 9: case 10: case 11:\
+ case 12: case 13: case 14:\
+ k2 = k2>>8 | (uint64_t)*ptr++<<56;\
+ n++; break;\
+\
+ case 15:\
+ k2 = k2>>8 | (uint64_t)*ptr++<<56;\
+ doblock128x64(h1, h2, k1, k2);\
+ n = 0; break;\
+ }\
+ }\
+} while(0)
+
+/* Finalize a hash. To match the original Murmur3_128x64 the total_length must be provided */
+void PMurHash128x64_Result(const uint64_t * const ph, const uint64_t * const pcarry,
+ const uint32_t total_length, uint64_t * const out)
+{
+ uint64_t h1 = ph[0];
+ uint64_t h2 = ph[1];
+
+ uint64_t k1;
+ uint64_t k2 = pcarry[1];
+
+ int n = k2 & 15;
+ if (n) {
+ k1 = pcarry[0];
+ if (n > 8) {
+ k2 >>= (16-n)*8;
+ k2 *= kC2L; k2 = ROTL64(k2,33); k2 *= kC1L; h2 ^= k2;
+ } else {
+ k1 >>= (8-n)*8;
+ }
+ k1 *= kC1L; k1 = ROTL64(k1,31); k1 *= kC2L; h1 ^= k1;
+ }
+
+ //----------
+ // finalization
+
+ h1 ^= total_length; h2 ^= total_length;
+
+ h1 += h2;
+ h2 += h1;
+
+ h1 = fmix64(h1);
+ h2 = fmix64(h2);
+
+ h1 += h2;
+ h2 += h1;
+
+ out[0] = h1;
+ out[1] = h2;
+}
+
+/*---------------------------------------------------------------------------*/
+
+/* Main hashing function. Initialise carry[2] to {0,0} and h[2] to an initial {seed,seed}
+ * if wanted. Both ph and pcarry are required arguments. */
+void PMurHash128x64_Process(uint64_t * const ph, uint64_t * const pcarry, const void * const key, int len)
+{
+ uint64_t h1 = ph[0];
+ uint64_t h2 = ph[1];
+
+ uint64_t k1 = pcarry[0];
+ uint64_t k2 = pcarry[1];
+
+ const uint8_t *ptr = (uint8_t*)key;
+ const uint8_t *end;
+
+ /* Extract carry count from low 4 bits of c value */
+ int n = k2 & 15;
+
+#if defined(UNALIGNED_SAFE)
+ /* This CPU handles unaligned word access */
+// #pragma message ( "UNALIGNED_SAFE" )
+ /* Consume any carry bytes */
+ int i = (16-n) & 15;
+ if(i && i <= len) {
+ dobytes128x64(i, h1, h2, k1, k2, n, ptr, len);
+ }
+
+ /* Process 128-bit chunks */
+ end = ptr + (len & ~15);
+ for( ; ptr < end ; ptr+=16) {
+ k1 = READ_UINT64(ptr, 0);
+ k2 = READ_UINT64(ptr, 1);
+ doblock128x64(h1, h2, k1, k2);
+ }
+
+#else /*UNALIGNED_SAFE*/
+ /* This CPU does not handle unaligned word access */
+// #pragma message ( "ALIGNED" )
+ /* Consume enough so that the next data byte is word aligned */
+ int i = -(intptr_t)(void *)ptr & 7;
+ if(i && i <= len) {
+ dobytes128x64(i, h1, h2, k1, k2, n, ptr, len);
+ }
+ /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */
+ end = ptr + (len & ~15);
+
+ switch(n) { /* how many bytes in c */
+ case 0: /*
+ k1=[--------] k2=[--------] w=[76543210 fedcba98] b=[76543210 fedcba98] */
+ for( ; ptr < end ; ptr+=16) {
+ k1 = READ_UINT64(ptr, 0);
+ k2 = READ_UINT64(ptr, 1);
+ doblock128x64(h1, h2, k1, k2);
+ }
+ break;
+ case 1: case 2: case 3: case 4: case 5: case 6: case 7: /*
+ k1=[10------] k2=[--------] w=[98765432 hgfedcba] b=[76543210 fedcba98] k1'=[hg------] */
+ {
+ const int lshift = n*8, rshift = 64-lshift;
+ for( ; ptr < end ; ptr+=16) {
+ uint64_t c = k1>>rshift;
+ k2 = READ_UINT64(ptr, 0);
+ c |= k2<<lshift;
+ k1 = READ_UINT64(ptr, 1);
+ k2 = k2>>rshift | k1<<lshift;
+ doblock128x64(h1, h2, c, k2);
+ }
+ }
+ break;
+ case 8: /*
+ k1=[76543210] k2=[--------] w=[fedcba98 nmlkjihg] b=[76543210 fedcba98] k1`=[nmlkjihg] */
+ for( ; ptr < end ; ptr+=16) {
+ k2 = READ_UINT64(ptr, 0);
+ doblock128x64(h1, h2, k1, k2);
+ k1 = READ_UINT64(ptr, 1);
+ }
+ break;
+ default: /* 8 < n <= 15
+ k1=[76543210] k2=[98------] w=[hgfedcba ponmlkji] b=[76543210 fedcba98] k1`=[nmlkjihg] k2`=[po------] */
+ {
+ const int lshift = n*8-64, rshift = 64-lshift;
+ for( ; ptr < end ; ptr+=16) {
+ uint64_t c = k2 >> rshift;
+ k2 = READ_UINT64(ptr, 0);
+ c |= k2 << lshift;
+ doblock128x64(h1, h2, k1, c);
+ k1 = k2 >> rshift;
+ k2 = READ_UINT64(ptr, 1);
+ k1 |= k2 << lshift;
+ }
+ }
+ }
+#endif /*UNALIGNED_SAFE*/
+
+ /* Advance over whole 128-bit chunks, possibly leaving 1..15 bytes */
+ len -= len & ~15;
+
+ /* Append any remaining bytes into carry */
+ dobytes128x64(len, h1, h2, k1, k2, n, ptr, len);
+
+ /* Copy out new running hash and carry */
+ ph[0] = h1;
+ ph[1] = h2;
+ pcarry[0] = k1;
+ pcarry[1] = (k2 & ~0xff) | n;
+}
+
+/*---------------------------------------------------------------------------*/
+
+/* All in one go */
+
+/* MurmurHash3_x64_128 api */
+void PMurHash128x64(const void * key, const int len, uint32_t seed, void * out)
+{
+ uint64_t carry[2] = {0, 0};
+ uint64_t h[2] = {seed, seed};
+ PMurHash128x64_Process(h, carry, key, len);
+ PMurHash128x64_Result(h, carry, (uint32_t) len, (uint64_t *) out);
+}