diff options
author | Yves Orton <demerphq@gmail.com> | 2012-11-24 19:21:38 +0100 |
---|---|---|
committer | Yves Orton <demerphq@gmail.com> | 2012-11-24 19:41:11 +0100 |
commit | 4886dc4f5813d46e391c2336d87c89a3f270b7f9 (patch) | |
tree | 42d55a5f86a8ec350382e15e0767020207859197 /hv.h | |
parent | d0761305e645847e893799c475b2a24d15afbcd0 (diff) | |
download | perl-4886dc4f5813d46e391c2336d87c89a3f270b7f9.tar.gz |
Add "buzzhash16" - a random hash function
This is just a toy. Probably not worth using in production. But
interesting enough I thought I would include it.
The idea is to use the hash seed as a table of random 16 bit integers
whose values are what we hash depending on the character we read.
It is pretty fast, I have no idea how secure it is. It will probably
work really badly if the seed is crap. YMMV.
Diffstat (limited to 'hv.h')
-rw-r--r-- | hv.h | 57 |
1 files changed, 55 insertions, 2 deletions
@@ -129,6 +129,7 @@ struct xpvhv { #define PERL_HASH_SEED_U32 *((U32*)PERL_HASH_SEED) #define PERL_HASH_SEED_U64_1 (((U64*)PERL_HASH_SEED)[0]) #define PERL_HASH_SEED_U64_2 (((U64*)PERL_HASH_SEED)[1]) +#define PERL_HASH_SEED_U16_x(idx) (((U16*)PERL_HASH_SEED)[idx]) /* legacy - only mod_perl should be doing this. */ #ifdef PERL_HASH_INTERNAL_ACCESS @@ -142,13 +143,65 @@ struct xpvhv { #define PERL_HASH_FUNC_MURMUR3 #define PERL_HASH_FUNC_SIPHASH #define PERL_HASH_FUNC_ONE_AT_A_TIME +#define PERL_HASH_FUNC_BUZZHASH16 */ -#if !(defined(PERL_HASH_FUNC_SDBM) || defined(PERL_HASH_FUNC_DJB2) || defined(PERL_HASH_FUNC_SUPERFAST) || defined(PERL_HASH_FUNC_MURMUR3) || defined(PERL_HASH_FUNC_ONE_AT_A_TIME)) +#if !(defined(PERL_HASH_FUNC_SDBM) || defined(PERL_HASH_FUNC_DJB2) || defined(PERL_HASH_FUNC_SUPERFAST) \ + || defined(PERL_HASH_FUNC_MURMUR3) || defined(PERL_HASH_FUNC_ONE_AT_A_TIME) || defined(PERL_HASH_FUNC_BUZZHASH16)) #define PERL_HASH_FUNC_MURMUR3 #endif -#if defined(PERL_HASH_FUNC_SIPHASH) +#if defined(PERL_HASH_FUNC_BUZZHASH16) +/* "BUZZHASH16" + * + * I whacked this together while just playing around. + * + * The idea is that instead of hashing the actual string input we use the + * bytes of the string as an index into a table of randomly generated + * 16 bit values. + * + * A left rotate is used to "mix" in previous bits as we go, and I borrowed + * the avalanche function from one-at-a-time for the final step. A lookup + * into the table based on the lower 8 bits of the length combined with + * the length itself is used as an itializer. + * + * The resulting hash value has no actual bits fed in from the string so + * I would guess it is pretty secure, although I am not a cryptographer + * and have no idea for sure. Nor has it been rigorously tested. On the + * other hand it is reasonably fast, and seems to produce reasonable + * distributions. + * + * Yves Orton + */ + + +#define PERL_HASH_FUNC "BUZZHASH16" +#define PERL_HASH_SEED_BYTES 512 /* 2 bytes per octet value, 2 * 256 */ +/* Find best way to ROTL32 */ +#if defined(_MSC_VER) + #include <stdlib.h> /* Microsoft put _rotl declaration in here */ + #define BUZZHASH_ROTL32(x,r) _rotl(x,r) +#else + /* gcc recognises this code and generates a rotate instruction for CPUs with one */ + #define BUZZHASH_ROTL32(x,r) (((U32)x << r) | ((U32)x >> (32 - r))) +#endif + +#define PERL_HASH(hash,str,len) \ + STMT_START { \ + register const char * const s_PeRlHaSh_tmp = (str); \ + register const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \ + register const unsigned char *end_PeRlHaSh = (const unsigned char *)s_PeRlHaSh + len; \ + register U32 hash_PeRlHaSh = (PERL_HASH_SEED_U16_x(len & 0xff) << 16) + len; \ + while (s_PeRlHaSh < end_PeRlHaSh) { \ + hash_PeRlHaSh ^= PERL_HASH_SEED_U16_x((U8)*s_PeRlHaSh++); \ + hash_PeRlHaSh += BUZZHASH_ROTL32(hash_PeRlHaSh,11); \ + } \ + hash_PeRlHaSh += (hash_PeRlHaSh << 3); \ + hash_PeRlHaSh ^= (hash_PeRlHaSh >> 11); \ + (hash) = (hash_PeRlHaSh + (hash_PeRlHaSh << 15)); \ + } STMT_END + +#elif defined(PERL_HASH_FUNC_SIPHASH) #define PERL_HASH_FUNC "SIPHASH" #define PERL_HASH_SEED_BYTES 16 |