summaryrefslogtreecommitdiff
path: root/hv_func.h
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2016-12-06 23:48:52 +0100
committerYves Orton <demerphq@gmail.com>2016-12-07 00:04:30 +0100
commit6b0260474df579e9412f57249519747ab8bb5c2b (patch)
tree320cbf33855b5de9da39a7f96bc0c5a3f5031e7d /hv_func.h
parent86f791091e14932363af3abc0e78844b973b86b5 (diff)
downloadperl-6b0260474df579e9412f57249519747ab8bb5c2b.tar.gz
use a hybrid hash function, OAATH for short keys, Siphash 1-3 for longer ones
Switch to an optimized/unrolled variant of OAATH for keys 16 bytes and less, and use Siphash 1-3 for longer keys. (On 64 bit builds, 32 bit is untouched.) I've done a bunch of benchmarking with Porting/bench.pl to see at what point the 8 byte reads of Siphash 1-3 start to win over the low overhead costs of OAATH. It seems to be around 16 bytes. At the same time, we unroll OAATH so that we can save some instructions per character. The net result is all key lengths get faster as measured by Porting/bench.pl, and hashing longer keys becomes *much* faster. Interestingly the actual crossover point of OAATH being slower than Siphash 1-3 is much higher than might be guessed from bulk testing either in a raw benchmark. For instance, basic benchmarks of the hash function alone predicts that Siphash 1-3 should "win" at around 4 bytes. However, in practice it is four times longer. I believe this is because setting up Siphash blows away a fair amount of CPU state compared to OAATH, which is more important when the hash is going to be used to manage a data structure. So it requires a correspondingly longer string before the larger sized read starts to win. Summarized stats (higher is better): AVERAGE blead yves ------ ------ Ir 100.00 104.47 Dr 100.00 103.97 Dw 100.00 107.63 COND 100.00 112.16 IND 100.00 75.07 COND_m 100.00 125.75 IND_m 100.00 97.42 Ir_m1 100.00 137.11 Dr_m1 100.00 100.10 Dw_m1 100.00 99.62 Ir_mm 100.00 100.00 Dr_mm 100.00 100.00 Dw_mm 100.00 100.00
Diffstat (limited to 'hv_func.h')
-rw-r--r--hv_func.h240
1 files changed, 184 insertions, 56 deletions
diff --git a/hv_func.h b/hv_func.h
index 57b1ed1375..8e0329f3c8 100644
--- a/hv_func.h
+++ b/hv_func.h
@@ -14,6 +14,8 @@
#if !( 0 \
|| defined(PERL_HASH_FUNC_SIPHASH) \
+ || defined(PERL_HASH_FUNC_SIPHASH13) \
+ || defined(PERL_HASH_FUNC_HYBRID_OAATHU_SIPHASH13) \
|| defined(PERL_HASH_FUNC_SDBM) \
|| defined(PERL_HASH_FUNC_DJB2) \
|| defined(PERL_HASH_FUNC_SUPERFAST) \
@@ -24,13 +26,25 @@
|| defined(PERL_HASH_FUNC_MURMUR_HASH_64A) \
|| defined(PERL_HASH_FUNC_MURMUR_HASH_64B) \
)
+#ifdef HAS_QUAD
+#define PERL_HASH_FUNC_HYBRID_OAATHU_SIPHASH13
+#else
#define PERL_HASH_FUNC_ONE_AT_A_TIME_HARD
#endif
+#endif
#if defined(PERL_HASH_FUNC_SIPHASH)
# define PERL_HASH_FUNC "SIPHASH_2_4"
# define PERL_HASH_SEED_BYTES 16
# define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_siphash_2_4((seed),(U8*)(str),(len))
+#elif defined(PERL_HASH_FUNC_SIPHASH13)
+# define PERL_HASH_FUNC "SIPHASH_1_3"
+# define PERL_HASH_SEED_BYTES 16
+# define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_siphash_1_3((seed),(U8*)(str),(len))
+#elif defined(PERL_HASH_FUNC_HYBRID_OAATHU_SIPHASH13)
+# define PERL_HASH_FUNC "HYBRID_OAATHU_SIPHASH_1_3"
+# define PERL_HASH_SEED_BYTES 24
+# define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_oaathu_siphash_1_3((seed),(U8*)(str),(len))
#elif defined(PERL_HASH_FUNC_SUPERFAST)
# define PERL_HASH_FUNC "SUPERFAST"
# define PERL_HASH_SEED_BYTES 4
@@ -192,72 +206,89 @@
((U64)((p)[7]) << 56))
#define SIPROUND \
- do { \
+ STMT_START { \
v0 += v1; v1=ROTL64(v1,13); v1 ^= v0; v0=ROTL64(v0,32); \
v2 += v3; v3=ROTL64(v3,16); v3 ^= v2; \
v0 += v3; v3=ROTL64(v3,21); v3 ^= v0; \
v2 += v1; v1=ROTL64(v1,17); v1 ^= v2; v2=ROTL64(v2,32); \
- } while(0)
+ } STMT_END
/* SipHash-2-4 */
-PERL_STATIC_INLINE U32
-S_perl_hash_siphash_2_4(const unsigned char * const seed, const unsigned char *in, const STRLEN inlen) {
- /* "somepseudorandomlygeneratedbytes" */
- U64 v0 = UINT64_C(0x736f6d6570736575);
- U64 v1 = UINT64_C(0x646f72616e646f6d);
- U64 v2 = UINT64_C(0x6c7967656e657261);
- U64 v3 = UINT64_C(0x7465646279746573);
-
- U64 b;
- U64 k0 = ((const U64*)seed)[0];
- U64 k1 = ((const U64*)seed)[1];
- U64 m;
- const int left = inlen & 7;
- const U8 *end = in + inlen - left;
-
- b = ( ( U64 )(inlen) ) << 56;
- v3 ^= k1;
- v2 ^= k0;
- v1 ^= k1;
- v0 ^= k0;
-
- for ( ; in != end; in += 8 )
- {
- m = U8TO64_LE( in );
- v3 ^= m;
- SIPROUND;
- SIPROUND;
- v0 ^= m;
- }
-
- switch( left )
- {
- case 7: b |= ( ( U64 )in[ 6] ) << 48;
- case 6: b |= ( ( U64 )in[ 5] ) << 40;
- case 5: b |= ( ( U64 )in[ 4] ) << 32;
- case 4: b |= ( ( U64 )in[ 3] ) << 24;
- case 3: b |= ( ( U64 )in[ 2] ) << 16;
- case 2: b |= ( ( U64 )in[ 1] ) << 8;
- case 1: b |= ( ( U64 )in[ 0] ); break;
- case 0: break;
- }
-
- v3 ^= b;
- SIPROUND;
- SIPROUND;
- v0 ^= b;
-
- v2 ^= 0xff;
- SIPROUND;
- SIPROUND;
- SIPROUND;
- SIPROUND;
- b = v0 ^ v1 ^ v2 ^ v3;
- return (U32)(b & U32_MAX);
+
+#define PERL_SIPHASH_FNC(FNC,SIP_ROUNDS,SIP_FINAL_ROUNDS) \
+PERL_STATIC_INLINE U32 \
+FNC(const unsigned char * const seed, const unsigned char *in, const STRLEN inlen) { \
+ /* "somepseudorandomlygeneratedbytes" */ \
+ U64 v0 = UINT64_C(0x736f6d6570736575); \
+ U64 v1 = UINT64_C(0x646f72616e646f6d); \
+ U64 v2 = UINT64_C(0x6c7967656e657261); \
+ U64 v3 = UINT64_C(0x7465646279746573); \
+ \
+ U64 b; \
+ U64 k0 = ((const U64*)seed)[0]; \
+ U64 k1 = ((const U64*)seed)[1]; \
+ U64 m; \
+ const int left = inlen & 7; \
+ const U8 *end = in + inlen - left; \
+ \
+ b = ( ( U64 )(inlen) ) << 56; \
+ v3 ^= k1; \
+ v2 ^= k0; \
+ v1 ^= k1; \
+ v0 ^= k0; \
+ \
+ for ( ; in != end; in += 8 ) \
+ { \
+ m = U8TO64_LE( in ); \
+ v3 ^= m; \
+ \
+ SIP_ROUNDS; \
+ \
+ v0 ^= m; \
+ } \
+ \
+ switch( left ) \
+ { \
+ case 7: b |= ( ( U64 )in[ 6] ) << 48; \
+ case 6: b |= ( ( U64 )in[ 5] ) << 40; \
+ case 5: b |= ( ( U64 )in[ 4] ) << 32; \
+ case 4: b |= ( ( U64 )in[ 3] ) << 24; \
+ case 3: b |= ( ( U64 )in[ 2] ) << 16; \
+ case 2: b |= ( ( U64 )in[ 1] ) << 8; \
+ case 1: b |= ( ( U64 )in[ 0] ); break; \
+ case 0: break; \
+ } \
+ \
+ v3 ^= b; \
+ \
+ SIP_ROUNDS; \
+ \
+ v0 ^= b; \
+ \
+ v2 ^= 0xff; \
+ \
+ SIP_FINAL_ROUNDS \
+ \
+ b = v0 ^ v1 ^ v2 ^ v3; \
+ return (U32)(b & U32_MAX); \
}
+
+PERL_SIPHASH_FNC(
+ S_perl_hash_siphash_1_3
+ ,SIPROUND;
+ ,SIPROUND;SIPROUND;SIPROUND;
+)
+
+PERL_SIPHASH_FNC(
+ S_perl_hash_siphash_2_4
+ ,SIPROUND;SIPROUND;
+ ,SIPROUND;SIPROUND;SIPROUND;SIPROUND;
+)
+
#endif /* defined(HAS_QUAD) */
+
/* FYI: This is the "Super-Fast" algorithm mentioned by Bob Jenkins in
* (http://burtleburtle.net/bob/hash/doobs.html)
* It is by Paul Hsieh (c) 2004 and is analysed here
@@ -550,6 +581,102 @@ S_perl_hash_one_at_a_time_hard(const unsigned char * const seed, const unsigned
return (hash + (hash << 15));
}
+#ifdef HAS_QUAD
+/* For short strings, 16 bytes or shorter, we use an optimised variant
+ * of One At A Time Hard, and for longer strings, we use siphash_1_3.
+ */
+PERL_STATIC_INLINE U32
+S_perl_hash_oaathu_siphash_1_3(const unsigned char * const seed, const unsigned char *str, const STRLEN len) {
+ U32 hash = *((const U32*)seed) + (U32)len;
+ switch (len) {
+ case 16:
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ hash += str[15];
+ case 15:
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ hash += str[14];
+ case 14:
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ hash += str[13];
+ case 13:
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ hash += str[12];
+ case 12:
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ hash += str[11];
+ case 11:
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ hash += str[10];
+ case 10:
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ hash += str[9];
+ case 9:
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ hash += str[8];
+ case 8:
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ hash += str[7];
+ case 7:
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ hash += str[6];
+ case 6:
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ hash += str[5];
+ case 5:
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ hash += str[4];
+ case 4:
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ hash += str[3];
+ case 3:
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ hash += str[2];
+ case 2:
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ hash += str[1];
+ case 1:
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ hash += str[0];
+ case 0:
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ hash += seed[4];
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ hash += seed[5];
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ hash += seed[6];
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ hash += seed[7];
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+
+ hash += (hash << 3);
+ hash ^= (hash >> 11);
+ return (hash + (hash << 15));
+ }
+ return S_perl_hash_siphash_1_3(seed+8, str, len);
+}
+#endif /* defined(HAS_QUAD) */
+
PERL_STATIC_INLINE U32
S_perl_hash_old_one_at_a_time(const unsigned char * const seed, const unsigned char *str, const STRLEN len) {
const unsigned char * const end = (const unsigned char *)str + len;
@@ -692,6 +819,7 @@ S_perl_hash_murmur_hash_64b (const unsigned char * const seed, const unsigned ch
}
#endif
+
/* legacy - only mod_perl should be doing this. */
#ifdef PERL_HASH_INTERNAL_ACCESS
#define PERL_HASH_INTERNAL(hash,str,len) PERL_HASH(hash,str,len)