diff options
Diffstat (limited to 'ext/hash/murmur/PMurHash128.c')
-rw-r--r-- | ext/hash/murmur/PMurHash128.c | 640 |
1 files changed, 640 insertions, 0 deletions
diff --git a/ext/hash/murmur/PMurHash128.c b/ext/hash/murmur/PMurHash128.c new file mode 100644 index 0000000000..2856542190 --- /dev/null +++ b/ext/hash/murmur/PMurHash128.c @@ -0,0 +1,640 @@ +/*----------------------------------------------------------------------------- + * MurmurHash3 was written by Austin Appleby, and is placed in the public + * domain. + * + * This is a c++ implementation of MurmurHash3_128 with support for progressive + * processing based on PMurHash implementation written by Shane Day. + */ + +/*----------------------------------------------------------------------------- + +If you want to understand the MurmurHash algorithm you would be much better +off reading the original source. Just point your browser at: +http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp + + +What this version provides? + +1. Progressive data feeding. Useful when the entire payload to be hashed +does not fit in memory or when the data is streamed through the application. +Also useful when hashing a number of strings with a common prefix. A partial +hash of a prefix string can be generated and reused for each suffix string. + +How does it work? + +We can only process entire 128 bit chunks of input, except for the very end +that may be shorter. So along with the partial hash we need to give back to +the caller a carry containing up to 15 bytes that we were unable to process. +This carry also needs to record the number of bytes the carry holds. I use +the low 4 bits as a count (0..15) and the carry bytes are shifted into the +high byte in stream order. + +To handle endianess I simply use a macro that reads an uint and define +that macro to be a direct read on little endian machines, a read and swap +on big endian machines. + +-----------------------------------------------------------------------------*/ + + +#include "PMurHash128.h" + +/*----------------------------------------------------------------------------- + * Endianess, misalignment capabilities and util macros + * + * The following 5 macros are defined in this section. The other macros defined + * are only needed to help derive these 5. + * + * READ_UINT32(x,i) Read a little endian unsigned 32-bit int at index + * READ_UINT64(x,i) Read a little endian unsigned 64-bit int at index + * UNALIGNED_SAFE Defined if READ_UINTXX works on non-word boundaries + * ROTL32(x,r) Rotate x left by r bits + * ROTL64(x,r) Rotate x left by r bits + * BIG_CONSTANT + * FORCE_INLINE + */ + +/* I386 or AMD64 */ +#if defined(_M_I86) || defined(_M_IX86) || defined(_X86_) || defined(__i386__) || defined(__i386) || defined(i386) \ + || defined(_M_X64) || defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64) + #define UNALIGNED_SAFE +#endif + +/* Find best way to ROTL */ +#if defined(_MSC_VER) + #define FORCE_INLINE static __forceinline + #include <stdlib.h> /* Microsoft put _rotl declaration in here */ + #define ROTL32(x,y) _rotl(x,y) + #define ROTL64(x,y) _rotl64(x,y) + #define BIG_CONSTANT(x) (x) +#else + #define FORCE_INLINE static inline __attribute__((always_inline)) + /* gcc recognises this code and generates a rotate instruction for CPUs with one */ + #define ROTL32(x,r) (((uint32_t)x << r) | ((uint32_t)x >> (32 - r))) + #define ROTL64(x,r) (((uint64_t)x << r) | ((uint64_t)x >> (64 - r))) + #define BIG_CONSTANT(x) (x##LLU) +#endif + +#include "endianness.h" + +#define READ_UINT64(ptr,i) getblock64((uint64_t *)ptr,i) +#define READ_UINT32(ptr,i) getblock32((uint32_t *)ptr,i) + +//----------------------------------------------------------------------------- +// Finalization mix - force all bits of a hash block to avalanche + +FORCE_INLINE uint32_t fmix32 ( uint32_t h ) +{ + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + +//---------- + +FORCE_INLINE uint64_t fmix64 ( uint64_t k ) +{ + k ^= k >> 33; + k *= BIG_CONSTANT(0xff51afd7ed558ccd); + k ^= k >> 33; + k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); + k ^= k >> 33; + + return k; +} + +/*-----------------------------------------------------------------------------* + PMurHash128x86 + *-----------------------------------------------------------------------------*/ +/*----------------------------------------------------------------------------- + * Core murmurhash algorithm macros */ + +static const uint32_t kC1 = 0x239b961b; +static const uint32_t kC2 = 0xab0e9789; +static const uint32_t kC3 = 0x38b34ae5; +static const uint32_t kC4 = 0xa1e38b93; + +/* This is the main processing body of the algorithm. It operates + * on each full 128-bits of input. */ +#define doblock128x86(h1, h2, h3, h4, k1, k2, k3,k4)\ +do {\ + k1 *= kC1; k1 = ROTL32(k1,15); k1 *= kC2; h1 ^= k1;\ +\ + h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;\ +\ + k2 *= kC2; k2 = ROTL32(k2,16); k2 *= kC3; h2 ^= k2;\ +\ + h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;\ +\ + k3 *= kC3; k3 = ROTL32(k3,17); k3 *= kC4; h3 ^= k3;\ +\ + h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;\ +\ + k4 *= kC4; k4 = ROTL32(k4,18); k4 *= kC1; h4 ^= k4;\ +\ + h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;\ +} while(0) + +/* Append unaligned bytes to carry, forcing hash churn if we have 16 bytes */ +/* cnt=bytes to process, h1-h4=hash k1-k4=carry, n=bytes in carry, ptr/len=payload */ +#define dobytes128x86(cnt, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len)\ +do {\ + unsigned __cnt = cnt;\ + for(;__cnt--; len--) {\ + switch(n) {\ + case 0: case 1: case 2: case 3:\ + k1 = k1>>8 | (uint32_t)*ptr++<<24;\ + ++n; break;\ +\ + case 4: case 5: case 6: case 7:\ + k2 = k2>>8 | (uint32_t)*ptr++<<24;\ + ++n; break;\ +\ + case 8: case 9: case 10: case 11:\ + k3 = k3>>8 | (uint32_t)*ptr++<<24;\ + ++n; break;\ +\ + case 12: case 13: case 14:\ + k4 = k4>>8 | (uint32_t)*ptr++<<24;\ + ++n; break;\ +\ + case 15:\ + k4 = k4>>8 | (uint32_t)*ptr++<<24;\ + doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);\ + n = 0; break;\ + }\ + }\ +} while(0) + +/* Finalize a hash. To match the original Murmur3_128x86 the total_length must be provided */ +void PMurHash128x86_Result(const uint32_t *ph, const uint32_t *pcarry, uint32_t total_length, uint32_t *out) +{ + uint32_t h1 = ph[0]; + uint32_t h2 = ph[1]; + uint32_t h3 = ph[2]; + uint32_t h4 = ph[3]; + + uint32_t k1, k2, k3, k4 = pcarry[3]; + + int n = k4 & 15; + switch(n) { + case 1: case 2: case 3: case 4: + k1 = pcarry[0] >> (4-n)*8; + goto finrot_k1; + + case 5: case 6: case 7: case 8: + k2 = pcarry[1] >> (8-n)*8; + goto finrot_k21; + + case 9: case 10: case 11: case 12: + k3 = pcarry[2] >> (12-n)*8; + goto finrot_k321; + + case 13: case 14: case 15: + k4 >>= (16-n)*8; + goto finrot_k4321; + + default: + goto skiprot; + } +finrot_k4321: + k4 *= kC4; k4 = ROTL32(k4,18); k4 *= kC1; h4 ^= k4; + k3 = pcarry[2]; +finrot_k321: + k3 *= kC3; k3 = ROTL32(k3,17); k3 *= kC4; h3 ^= k3; + k2 = pcarry[1]; +finrot_k21: + k2 *= kC2; k2 = ROTL32(k2,16); k2 *= kC3; h2 ^= k2; + k1 = pcarry[0]; +finrot_k1: + k1 *= kC1; k1 = ROTL32(k1,15); k1 *= kC2; h1 ^= k1; +skiprot: + + //---------- + // finalization + + h1 ^= total_length; h2 ^= total_length; + h3 ^= total_length; h4 ^= total_length; + + h1 += h2; h1 += h3; h1 += h4; + h2 += h1; h3 += h1; h4 += h1; + + h1 = fmix32(h1); + h2 = fmix32(h2); + h3 = fmix32(h3); + h4 = fmix32(h4); + + h1 += h2; h1 += h3; h1 += h4; + h2 += h1; h3 += h1; h4 += h1; + + out[0] = h1; + out[1] = h2; + out[2] = h3; + out[3] = h4; +} + +/*---------------------------------------------------------------------------*/ + +/* Main hashing function. Initialise carry[4] to {0,0,0,0} and h[4] to an initial {seed,seed,seed,seed} + * if wanted. Both ph and pcarry are required arguments. */ +void PMurHash128x86_Process(uint32_t * const ph, uint32_t * const pcarry, const void * const key, int len) +{ + uint32_t h1 = ph[0]; + uint32_t h2 = ph[1]; + uint32_t h3 = ph[2]; + uint32_t h4 = ph[3]; + + uint32_t k1 = pcarry[0]; + uint32_t k2 = pcarry[1]; + uint32_t k3 = pcarry[2]; + uint32_t k4 = pcarry[3]; + + const uint8_t *ptr = (uint8_t*)key; + const uint8_t *end; + + /* Extract carry count from low 4 bits of c value */ + int n = k4 & 15; + +#if defined(UNALIGNED_SAFE) + /* This CPU handles unaligned word access */ +// #pragma message ( "UNALIGNED_SAFE" ) + /* Consume any carry bytes */ + int i = (16-n) & 15; + if(i && i <= len) { + dobytes128x86(i, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len); + } + + /* Process 128-bit chunks */ + end = ptr + (len & ~15); + for( ; ptr < end ; ptr+=16) { + k1 = READ_UINT32(ptr, 0); + k2 = READ_UINT32(ptr, 1); + k3 = READ_UINT32(ptr, 2); + k4 = READ_UINT32(ptr, 3); + doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4); + } + +#else /*UNALIGNED_SAFE*/ + /* This CPU does not handle unaligned word access */ +// #pragma message ( "ALIGNED" ) + /* Consume enough so that the next data byte is word aligned */ + int i = -(intptr_t)(void *)ptr & 3; + if(i && i <= len) { + dobytes128x86(i, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len); + } + /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */ + end = ptr + (len & ~15); + + switch(n) { /* how many bytes in c */ + case 0: /* + k1=[----] k2=[----] k2=[----] k4=[----] w=[3210 7654 ba98 fedc] b=[3210 7654 ba98 fedc] */ + for( ; ptr < end ; ptr+=16) { + k1 = READ_UINT32(ptr, 0); + k2 = READ_UINT32(ptr, 1); + k3 = READ_UINT32(ptr, 2); + k4 = READ_UINT32(ptr, 3); + doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4); + } + break; + case 1: case 2: case 3: /* + k1=[10--] k2=[----] k3=[----] k4=[----] w=[5432 9876 dcba hgfe] b=[3210 7654 ba98 fedc] k1'=[hg--] */ + { + const int lshift = n*8, rshift = 32-lshift; + for( ; ptr < end ; ptr+=16) { + uint32_t c = k1>>rshift; // --10 + k2 = READ_UINT32(ptr, 0); // 5432 + c |= k2<<lshift; // 3210. + k1 = READ_UINT32(ptr, 1); // 9876 + k2 = k1<<lshift | k2>>rshift; // 7654. + k4 = READ_UINT32(ptr, 2); // dcba + k3 = k4<<lshift | k1>>rshift; // ba98. + k1 = READ_UINT32(ptr, 3); // hgfe. + k4 = k1<<lshift | k4>>rshift; // fedc. + doblock128x86(h1, h2, h3, h4, c, k2, k3, k4); + } + } + break; + case 4: /* + k1=[3210] k2=[----] k3=[----] k4=[----] w=[7654 ba98 fedc jihg] b=[3210 7654 ba98 fedc] k1'=[jihg] */ + for( ; ptr < end ; ptr+=16) { + k2 = READ_UINT32(ptr, 0); + k3 = READ_UINT32(ptr, 1); + k4 = READ_UINT32(ptr, 2); + doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4); + k1 = READ_UINT32(ptr, 3); + } + break; + case 5: case 6: case 7: /* + k1=[3210] k2=[54--] k3=[----] k4=[----] w=[9876 dcba hgfe lkji] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[lk--] */ + { + const int lshift = n*8-32, rshift = 32-lshift; + for( ; ptr < end ; ptr+=16) { + uint32_t c = k2>>rshift; // --54 + k3 = READ_UINT32(ptr, 0); // 9876 + c |= k3<<lshift; // 7654. + k4 = READ_UINT32(ptr, 1); // dcba + k3 = k4<<lshift | k3>>rshift; // ba98. + k2 = READ_UINT32(ptr, 2); // hgfe + k4 = k2<<lshift | k4>>rshift; // fedc. + doblock128x86(h1, h2, h3, h4, k1, c, k3, k4); + k1 = k2>>rshift; // --hg + k2 = READ_UINT32(ptr, 3); // lkji. + k1 |= k2<<lshift; // jihg. + } + } + case 8: /* + k1=[3210] k2=[7654] k3=[----] k4=[----] w=[ba98 fedc jihg nmlk] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] */ + for( ; ptr < end ; ptr+=16) { + k3 = READ_UINT32(ptr, 0); + k4 = READ_UINT32(ptr, 1); + doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4); + k1 = READ_UINT32(ptr, 2); + k2 = READ_UINT32(ptr, 3); + } + break; + case 9: case 10: case 11: /* + k1=[3210] k2=[7654] k3=[98--] k4=[----] w=[dcba hgfe lkji ponm] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] k3'=[po--] */ + { + const int lshift = n*8-64, rshift = 32-lshift; + for( ; ptr < end ; ptr+=16) { + uint32_t c = k3>>rshift; // --98 + k4 = READ_UINT32(ptr, 0); // dcba + c |= k4<<lshift; // ba98. + k3 = READ_UINT32(ptr, 1); // hgfe + k4 = k3<<lshift | k4>>rshift; // fedc. + doblock128x86(h1, h2, h3, h4, k1, k2, c, k4); + k2 = READ_UINT32(ptr, 2); // lkji + k1 = k2<<lshift | k3>>rshift; // jihg. + k3 = READ_UINT32(ptr, 3); // ponm. + k2 = k3<<lshift | k2>>rshift; // nmlk. + } + } + case 12: /* + k1=[3210] k2=[7654] k3=[ba98] k4=[----] w=[fedc jihg nmlk rqpo] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] k3'=[rqpo] */ + for( ; ptr < end ; ptr+=16) { + k4 = READ_UINT32(ptr, 0); + doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4); + k1 = READ_UINT32(ptr, 1); + k2 = READ_UINT32(ptr, 2); + k3 = READ_UINT32(ptr, 3); + } + break; + default: /* 12 < n <= 15 + k1=[3210] k2=[7654] k3=[ba98] k4=[dc--] w=[hgfe lkji ponm tsrq] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] k3'=[rqpo] k3'=[ts--] */ + { + const int lshift = n*8-96, rshift = 32-lshift; + for( ; ptr < end ; ptr+=16) { + uint32_t c = k4>>rshift; // --dc + k4 = READ_UINT32(ptr, 0); // hgfe + c |= k4<<lshift; // fedc. + doblock128x86(h1, h2, h3, h4, k1, k2, k3, c); + k3 = READ_UINT32(ptr, 1); // lkji + k1 = k3<<lshift | k4>>rshift; // jihg. + c = READ_UINT32(ptr, 2); // ponm + k2 = c<<lshift | k3>>rshift; // nmlk. + k4 = READ_UINT32(ptr, 3); // tsrq. + k3 = k4<<lshift | c>>rshift; // rqpo. + } + } + } +#endif /*UNALIGNED_SAFE*/ + + /* Advance over whole 128-bit chunks, possibly leaving 1..15 bytes */ + len -= len & ~15; + + /* Append any remaining bytes into carry */ + dobytes128x86(len, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len); + + /* Copy out new running hash and carry */ + ph[0] = h1; + ph[1] = h2; + ph[2] = h3; + ph[3] = h4; + pcarry[0] = k1; + pcarry[1] = k2; + pcarry[2] = k3; + pcarry[3] = (k4 & ~0xff) | n; +} + +/*---------------------------------------------------------------------------*/ + +/* All in one go */ + +/* MurmurHash3_x86_128 api */ +void PMurHash128x86(const void * key, const int len, uint32_t seed, void * out) +{ + uint32_t carry[4] = {0, 0, 0, 0}; + uint32_t h[4] = {seed, seed, seed, seed}; + PMurHash128x86_Process(h, carry, key, len); + PMurHash128x86_Result(h, carry, (uint32_t) len, (uint32_t *) out); +} + +/*-----------------------------------------------------------------------------* + PMurHash128x64 + *-----------------------------------------------------------------------------*/ +/*----------------------------------------------------------------------------- + * Core murmurhash algorithm macros */ + +static const uint64_t kC1L = BIG_CONSTANT(0x87c37b91114253d5); +static const uint64_t kC2L = BIG_CONSTANT(0x4cf5ad432745937f); + +/* This is the main processing body of the algorithm. It operates + * on each full 128-bits of input. */ +#define doblock128x64(h1, h2, k1, k2)\ +do {\ + k1 *= kC1L; k1 = ROTL64(k1,31); k1 *= kC2L; h1 ^= k1;\ +\ + h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;\ +\ + k2 *= kC2L; k2 = ROTL64(k2,33); k2 *= kC1L; h2 ^= k2;\ +\ + h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;\ +} while(0) + +/* Append unaligned bytes to carry, forcing hash churn if we have 16 bytes */ +/* cnt=bytes to process, h1,h2=hash k1,k2=carry, n=bytes in carry, ptr/len=payload */ +#define dobytes128x64(cnt, h1, h2, k1, k2, n, ptr, len) \ +do {\ + unsigned __cnt = cnt;\ + for(;__cnt--; len--) {\ + switch(n) {\ + case 0: case 1: case 2: case 3:\ + case 4: case 5: case 6: case 7:\ + k1 = k1>>8 | (uint64_t)*ptr++<<56;\ + n++; break;\ +\ + case 8: case 9: case 10: case 11:\ + case 12: case 13: case 14:\ + k2 = k2>>8 | (uint64_t)*ptr++<<56;\ + n++; break;\ +\ + case 15:\ + k2 = k2>>8 | (uint64_t)*ptr++<<56;\ + doblock128x64(h1, h2, k1, k2);\ + n = 0; break;\ + }\ + }\ +} while(0) + +/* Finalize a hash. To match the original Murmur3_128x64 the total_length must be provided */ +void PMurHash128x64_Result(const uint64_t * const ph, const uint64_t * const pcarry, + const uint32_t total_length, uint64_t * const out) +{ + uint64_t h1 = ph[0]; + uint64_t h2 = ph[1]; + + uint64_t k1; + uint64_t k2 = pcarry[1]; + + int n = k2 & 15; + if (n) { + k1 = pcarry[0]; + if (n > 8) { + k2 >>= (16-n)*8; + k2 *= kC2L; k2 = ROTL64(k2,33); k2 *= kC1L; h2 ^= k2; + } else { + k1 >>= (8-n)*8; + } + k1 *= kC1L; k1 = ROTL64(k1,31); k1 *= kC2L; h1 ^= k1; + } + + //---------- + // finalization + + h1 ^= total_length; h2 ^= total_length; + + h1 += h2; + h2 += h1; + + h1 = fmix64(h1); + h2 = fmix64(h2); + + h1 += h2; + h2 += h1; + + out[0] = h1; + out[1] = h2; +} + +/*---------------------------------------------------------------------------*/ + +/* Main hashing function. Initialise carry[2] to {0,0} and h[2] to an initial {seed,seed} + * if wanted. Both ph and pcarry are required arguments. */ +void PMurHash128x64_Process(uint64_t * const ph, uint64_t * const pcarry, const void * const key, int len) +{ + uint64_t h1 = ph[0]; + uint64_t h2 = ph[1]; + + uint64_t k1 = pcarry[0]; + uint64_t k2 = pcarry[1]; + + const uint8_t *ptr = (uint8_t*)key; + const uint8_t *end; + + /* Extract carry count from low 4 bits of c value */ + int n = k2 & 15; + +#if defined(UNALIGNED_SAFE) + /* This CPU handles unaligned word access */ +// #pragma message ( "UNALIGNED_SAFE" ) + /* Consume any carry bytes */ + int i = (16-n) & 15; + if(i && i <= len) { + dobytes128x64(i, h1, h2, k1, k2, n, ptr, len); + } + + /* Process 128-bit chunks */ + end = ptr + (len & ~15); + for( ; ptr < end ; ptr+=16) { + k1 = READ_UINT64(ptr, 0); + k2 = READ_UINT64(ptr, 1); + doblock128x64(h1, h2, k1, k2); + } + +#else /*UNALIGNED_SAFE*/ + /* This CPU does not handle unaligned word access */ +// #pragma message ( "ALIGNED" ) + /* Consume enough so that the next data byte is word aligned */ + int i = -(intptr_t)(void *)ptr & 7; + if(i && i <= len) { + dobytes128x64(i, h1, h2, k1, k2, n, ptr, len); + } + /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */ + end = ptr + (len & ~15); + + switch(n) { /* how many bytes in c */ + case 0: /* + k1=[--------] k2=[--------] w=[76543210 fedcba98] b=[76543210 fedcba98] */ + for( ; ptr < end ; ptr+=16) { + k1 = READ_UINT64(ptr, 0); + k2 = READ_UINT64(ptr, 1); + doblock128x64(h1, h2, k1, k2); + } + break; + case 1: case 2: case 3: case 4: case 5: case 6: case 7: /* + k1=[10------] k2=[--------] w=[98765432 hgfedcba] b=[76543210 fedcba98] k1'=[hg------] */ + { + const int lshift = n*8, rshift = 64-lshift; + for( ; ptr < end ; ptr+=16) { + uint64_t c = k1>>rshift; + k2 = READ_UINT64(ptr, 0); + c |= k2<<lshift; + k1 = READ_UINT64(ptr, 1); + k2 = k2>>rshift | k1<<lshift; + doblock128x64(h1, h2, c, k2); + } + } + break; + case 8: /* + k1=[76543210] k2=[--------] w=[fedcba98 nmlkjihg] b=[76543210 fedcba98] k1`=[nmlkjihg] */ + for( ; ptr < end ; ptr+=16) { + k2 = READ_UINT64(ptr, 0); + doblock128x64(h1, h2, k1, k2); + k1 = READ_UINT64(ptr, 1); + } + break; + default: /* 8 < n <= 15 + k1=[76543210] k2=[98------] w=[hgfedcba ponmlkji] b=[76543210 fedcba98] k1`=[nmlkjihg] k2`=[po------] */ + { + const int lshift = n*8-64, rshift = 64-lshift; + for( ; ptr < end ; ptr+=16) { + uint64_t c = k2 >> rshift; + k2 = READ_UINT64(ptr, 0); + c |= k2 << lshift; + doblock128x64(h1, h2, k1, c); + k1 = k2 >> rshift; + k2 = READ_UINT64(ptr, 1); + k1 |= k2 << lshift; + } + } + } +#endif /*UNALIGNED_SAFE*/ + + /* Advance over whole 128-bit chunks, possibly leaving 1..15 bytes */ + len -= len & ~15; + + /* Append any remaining bytes into carry */ + dobytes128x64(len, h1, h2, k1, k2, n, ptr, len); + + /* Copy out new running hash and carry */ + ph[0] = h1; + ph[1] = h2; + pcarry[0] = k1; + pcarry[1] = (k2 & ~0xff) | n; +} + +/*---------------------------------------------------------------------------*/ + +/* All in one go */ + +/* MurmurHash3_x64_128 api */ +void PMurHash128x64(const void * key, const int len, uint32_t seed, void * out) +{ + uint64_t carry[2] = {0, 0}; + uint64_t h[2] = {seed, seed}; + PMurHash128x64_Process(h, carry, key, len); + PMurHash128x64_Result(h, carry, (uint32_t) len, (uint64_t *) out); +} |