summaryrefslogtreecommitdiff
path: root/hv.h
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2012-11-24 19:21:38 +0100
committerYves Orton <demerphq@gmail.com>2012-11-24 19:41:11 +0100
commit4886dc4f5813d46e391c2336d87c89a3f270b7f9 (patch)
tree42d55a5f86a8ec350382e15e0767020207859197 /hv.h
parentd0761305e645847e893799c475b2a24d15afbcd0 (diff)
downloadperl-4886dc4f5813d46e391c2336d87c89a3f270b7f9.tar.gz
Add "buzzhash16" - a random hash function
This is just a toy. Probably not worth using in production. But interesting enough I thought I would include it. The idea is to use the hash seed as a table of random 16 bit integers whose values are what we hash depending on the character we read. It is pretty fast, I have no idea how secure it is. It will probably work really badly if the seed is crap. YMMV.
Diffstat (limited to 'hv.h')
-rw-r--r--hv.h57
1 files changed, 55 insertions, 2 deletions
diff --git a/hv.h b/hv.h
index 3f899e5362..26ad8a9858 100644
--- a/hv.h
+++ b/hv.h
@@ -129,6 +129,7 @@ struct xpvhv {
#define PERL_HASH_SEED_U32 *((U32*)PERL_HASH_SEED)
#define PERL_HASH_SEED_U64_1 (((U64*)PERL_HASH_SEED)[0])
#define PERL_HASH_SEED_U64_2 (((U64*)PERL_HASH_SEED)[1])
+#define PERL_HASH_SEED_U16_x(idx) (((U16*)PERL_HASH_SEED)[idx])
/* legacy - only mod_perl should be doing this. */
#ifdef PERL_HASH_INTERNAL_ACCESS
@@ -142,13 +143,65 @@ struct xpvhv {
#define PERL_HASH_FUNC_MURMUR3
#define PERL_HASH_FUNC_SIPHASH
#define PERL_HASH_FUNC_ONE_AT_A_TIME
+#define PERL_HASH_FUNC_BUZZHASH16
*/
-#if !(defined(PERL_HASH_FUNC_SDBM) || defined(PERL_HASH_FUNC_DJB2) || defined(PERL_HASH_FUNC_SUPERFAST) || defined(PERL_HASH_FUNC_MURMUR3) || defined(PERL_HASH_FUNC_ONE_AT_A_TIME))
+#if !(defined(PERL_HASH_FUNC_SDBM) || defined(PERL_HASH_FUNC_DJB2) || defined(PERL_HASH_FUNC_SUPERFAST) \
+ || defined(PERL_HASH_FUNC_MURMUR3) || defined(PERL_HASH_FUNC_ONE_AT_A_TIME) || defined(PERL_HASH_FUNC_BUZZHASH16))
#define PERL_HASH_FUNC_MURMUR3
#endif
-#if defined(PERL_HASH_FUNC_SIPHASH)
+#if defined(PERL_HASH_FUNC_BUZZHASH16)
+/* "BUZZHASH16"
+ *
+ * I whacked this together while just playing around.
+ *
+ * The idea is that instead of hashing the actual string input we use the
+ * bytes of the string as an index into a table of randomly generated
+ * 16 bit values.
+ *
+ * A left rotate is used to "mix" in previous bits as we go, and I borrowed
+ * the avalanche function from one-at-a-time for the final step. A lookup
+ * into the table based on the lower 8 bits of the length combined with
+ * the length itself is used as an itializer.
+ *
+ * The resulting hash value has no actual bits fed in from the string so
+ * I would guess it is pretty secure, although I am not a cryptographer
+ * and have no idea for sure. Nor has it been rigorously tested. On the
+ * other hand it is reasonably fast, and seems to produce reasonable
+ * distributions.
+ *
+ * Yves Orton
+ */
+
+
+#define PERL_HASH_FUNC "BUZZHASH16"
+#define PERL_HASH_SEED_BYTES 512 /* 2 bytes per octet value, 2 * 256 */
+/* Find best way to ROTL32 */
+#if defined(_MSC_VER)
+ #include <stdlib.h> /* Microsoft put _rotl declaration in here */
+ #define BUZZHASH_ROTL32(x,r) _rotl(x,r)
+#else
+ /* gcc recognises this code and generates a rotate instruction for CPUs with one */
+ #define BUZZHASH_ROTL32(x,r) (((U32)x << r) | ((U32)x >> (32 - r)))
+#endif
+
+#define PERL_HASH(hash,str,len) \
+ STMT_START { \
+ register const char * const s_PeRlHaSh_tmp = (str); \
+ register const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \
+ register const unsigned char *end_PeRlHaSh = (const unsigned char *)s_PeRlHaSh + len; \
+ register U32 hash_PeRlHaSh = (PERL_HASH_SEED_U16_x(len & 0xff) << 16) + len; \
+ while (s_PeRlHaSh < end_PeRlHaSh) { \
+ hash_PeRlHaSh ^= PERL_HASH_SEED_U16_x((U8)*s_PeRlHaSh++); \
+ hash_PeRlHaSh += BUZZHASH_ROTL32(hash_PeRlHaSh,11); \
+ } \
+ hash_PeRlHaSh += (hash_PeRlHaSh << 3); \
+ hash_PeRlHaSh ^= (hash_PeRlHaSh >> 11); \
+ (hash) = (hash_PeRlHaSh + (hash_PeRlHaSh << 15)); \
+ } STMT_END
+
+#elif defined(PERL_HASH_FUNC_SIPHASH)
#define PERL_HASH_FUNC "SIPHASH"
#define PERL_HASH_SEED_BYTES 16