diff options
-rw-r--r-- | Cross/Makefile-cross-SH | 2 | ||||
-rw-r--r-- | MANIFEST | 1 | ||||
-rwxr-xr-x | Makefile.SH | 2 | ||||
-rw-r--r-- | Makefile.micro | 2 | ||||
-rw-r--r-- | NetWare/Makefile | 1 | ||||
-rwxr-xr-x | configpm | 2 | ||||
-rw-r--r-- | hv.h | 574 | ||||
-rw-r--r-- | hv_func.h | 509 | ||||
-rw-r--r-- | win32/Makefile | 1 | ||||
-rw-r--r-- | win32/Makefile.ce | 1 |
10 files changed, 519 insertions, 576 deletions
diff --git a/Cross/Makefile-cross-SH b/Cross/Makefile-cross-SH index 18c881d103..e5a14b490d 100644 --- a/Cross/Makefile-cross-SH +++ b/Cross/Makefile-cross-SH @@ -341,7 +341,7 @@ unidatafiles = lib/unicore/Decomposition.pl lib/unicore/TestProp.pl \ unidatadirs = lib/unicore/To lib/unicore/lib h1 = EXTERN.h INTERN.h XSUB.h av.h xconfig.h cop.h cv.h dosish.h -h2 = embed.h form.h gv.h handy.h hv.h keywords.h mg.h op.h opcode.h +h2 = embed.h form.h gv.h handy.h hv.h hv_func.h keywords.h mg.h op.h opcode.h h3 = pad.h patchlevel.h perl.h perlapi.h perly.h pp.h proto.h regcomp.h h4 = regexp.h scope.h sv.h unixish.h util.h iperlsys.h thread.h h5 = utf8.h warnings.h @@ -4240,6 +4240,7 @@ hints/utekv.sh Hints for named architecture hints/uwin.sh Hints for named architecture hints/vos.sh Hints for named architecture hv.c Hash value code +hv_func.h Hash value static inline function header hv.h Hash value header inline.h Static inline functions inline_invlist.c Inline functions for handling inversion lists diff --git a/Makefile.SH b/Makefile.SH index ab4e4ca191..e5ba3b958a 100755 --- a/Makefile.SH +++ b/Makefile.SH @@ -465,7 +465,7 @@ unidatafiles = lib/unicore/Decomposition.pl lib/unicore/TestProp.pl \ unidatadirs = lib/unicore/To lib/unicore/lib h1 = EXTERN.h INTERN.h XSUB.h av.h $(CONFIGH) cop.h cv.h dosish.h -h2 = embed.h form.h gv.h handy.h hv.h keywords.h mg.h op.h opcode.h +h2 = embed.h form.h gv.h handy.h hv.h hv_func.h keywords.h mg.h op.h opcode.h h3 = pad.h patchlevel.h perl.h perlapi.h perly.h pp.h proto.h regcomp.h h4 = regexp.h scope.h sv.h unixish.h util.h iperlsys.h thread.h h5 = utf8.h warnings.h mydtrace.h op_reg_common.h l1_char_class_tab.h diff --git a/Makefile.micro b/Makefile.micro index 8ce48b4d71..cd9a9c5833 100644 --- a/Makefile.micro +++ b/Makefile.micro @@ -29,7 +29,7 @@ microperl: $(O) generated_headers = uuudmap.h ubitcount.h umg_data.h H = av.h uconfig.h cop.h cv.h embed.h embedvar.h form.h gv.h handy.h \ - hv.h intrpvar.h iperlsys.h mg.h op.h opcode.h opnames.h pad.h \ + hv.h hv_func.h intrpvar.h iperlsys.h mg.h op.h opcode.h opnames.h pad.h \ patchlevel.h perl.h perlsdio.h perlvars.h perly.h pp.h \ pp_proto.h proto.h reentr.h regexp.h scope.h sv.h \ thread.h unixish.h utf8.h util.h warnings.h $(generated_headers) diff --git a/NetWare/Makefile b/NetWare/Makefile index 3e0ada31e9..afd0e36a61 100644 --- a/NetWare/Makefile +++ b/NetWare/Makefile @@ -756,6 +756,7 @@ CORE_NOCFG_H = \ ..\gv.h \ ..\handy.h \ ..\hv.h \ + ..\hv_func.h \ ..\iperlsys.h \ ..\mg.h \ ..\nostdio.h \ @@ -75,7 +75,7 @@ my %Extensions = map {($_,$_)} # This is the list from MM_VMS, plus pad.h, parser.h, perlsfio.h utf8.h # which it installs. It *doesn't* install perliol.h - FIXME. my @header_files = qw(EXTERN.h INTERN.h XSUB.h av.h config.h cop.h cv.h - embed.h embedvar.h form.h gv.h handy.h hv.h intrpvar.h + embed.h embedvar.h form.h gv.h handy.h hv.h hv_func.h intrpvar.h iperlsys.h keywords.h mg.h nostdio.h op.h opcode.h pad.h parser.h patchlevel.h perl.h perlio.h perlsdio.h perlsfio.h perlvars.h perly.h pp.h pp_proto.h proto.h @@ -103,578 +103,6 @@ struct xpvhv { STRLEN xhv_max; /* subscript of last element of xhv_array */ }; -/* hash a key */ -/* The use of a temporary pointer and the casting games - * is needed to serve the dual purposes of - * (a) the hashed data being interpreted as "unsigned char" (new since 5.8, - * a "char" can be either signed or unsigned, depending on the compiler) - * (b) catering for old code that uses a "char" - * - * The "hash seed" feature was added in Perl 5.8.1 to perturb the results - * to avoid "algorithmic complexity attacks". - * - * If USE_HASH_SEED is defined, hash randomisation is done by default - * If USE_HASH_SEED_EXPLICIT is defined, hash randomisation is done - * only if the environment variable PERL_HASH_SEED is set. - * (see also perl.c:perl_parse() and S_init_tls_and_interp() and util.c:get_hash_seed()) - */ -#ifndef PERL_HASH_SEED -# if defined(USE_HASH_SEED) || defined(USE_HASH_SEED_EXPLICIT) -# define PERL_HASH_SEED PL_hash_seed -# else -# define PERL_HASH_SEED "PeRlHaShhAcKpErl" -# endif -#endif - -#define PERL_HASH_SEED_U32 *((U32*)PERL_HASH_SEED) -#define PERL_HASH_SEED_U64_1 (((U64*)PERL_HASH_SEED)[0]) -#define PERL_HASH_SEED_U64_2 (((U64*)PERL_HASH_SEED)[1]) -#define PERL_HASH_SEED_U16_x(idx) (((U16*)PERL_HASH_SEED)[idx]) - -/* legacy - only mod_perl should be doing this. */ -#ifdef PERL_HASH_INTERNAL_ACCESS -#define PERL_HASH_INTERNAL(hash,str,len) PERL_HASH(hash,str,len) -#endif - -/* Uncomment one of the following lines to use an alternative hash algorithm. -#define PERL_HASH_FUNC_SDBM -#define PERL_HASH_FUNC_DJB2 -#define PERL_HASH_FUNC_SUPERFAST -#define PERL_HASH_FUNC_MURMUR3 -#define PERL_HASH_FUNC_SIPHASH -#define PERL_HASH_FUNC_ONE_AT_A_TIME -#define PERL_HASH_FUNC_ONE_AT_A_TIME_OLD -#define PERL_HASH_FUNC_BUZZHASH16 -*/ - -#if !( 0 \ - || defined(PERL_HASH_FUNC_SDBM) \ - || defined(PERL_HASH_FUNC_DJB2) \ - || defined(PERL_HASH_FUNC_SUPERFAST) \ - || defined(PERL_HASH_FUNC_MURMUR3) \ - || defined(PERL_HASH_FUNC_ONE_AT_A_TIME) \ - || defined(PERL_HASH_FUNC_ONE_AT_A_TIME_OLD) \ - || defined(PERL_HASH_FUNC_BUZZHASH16) \ - ) -#ifdef U64 -#define PERL_HASH_FUNC_SIPHASH -#else -#define PERL_HASH_FUNC_ONE_AT_A_TIME -#endif -#endif - -#if defined(PERL_HASH_FUNC_BUZZHASH16) -/* "BUZZHASH16" - * - * I whacked this together while just playing around. - * - * The idea is that instead of hashing the actual string input we use the - * bytes of the string as an index into a table of randomly generated - * 16 bit values. - * - * A left rotate is used to "mix" in previous bits as we go, and I borrowed - * the avalanche function from one-at-a-time for the final step. A lookup - * into the table based on the lower 8 bits of the length combined with - * the length itself is used as an itializer. - * - * The resulting hash value has no actual bits fed in from the string so - * I would guess it is pretty secure, although I am not a cryptographer - * and have no idea for sure. Nor has it been rigorously tested. On the - * other hand it is reasonably fast, and seems to produce reasonable - * distributions. - * - * Yves Orton - */ - - -#define PERL_HASH_FUNC "BUZZHASH16" -#define PERL_HASH_SEED_BYTES 512 /* 2 bytes per octet value, 2 * 256 */ -/* Find best way to ROTL32 */ -#if defined(_MSC_VER) - #include <stdlib.h> /* Microsoft put _rotl declaration in here */ - #define BUZZHASH_ROTL32(x,r) _rotl(x,r) -#else - /* gcc recognises this code and generates a rotate instruction for CPUs with one */ - #define BUZZHASH_ROTL32(x,r) (((U32)x << r) | ((U32)x >> (32 - r))) -#endif - -#define PERL_HASH(hash,str,len) \ - STMT_START { \ - const char * const s_PeRlHaSh_tmp = (str); \ - const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \ - const unsigned char *end_PeRlHaSh = (const unsigned char *)s_PeRlHaSh + len; \ - U32 hash_PeRlHaSh = (PERL_HASH_SEED_U16_x(len & 0xff) << 16) + len; \ - while (s_PeRlHaSh < end_PeRlHaSh) { \ - hash_PeRlHaSh ^= PERL_HASH_SEED_U16_x((U8)*s_PeRlHaSh++); \ - hash_PeRlHaSh += BUZZHASH_ROTL32(hash_PeRlHaSh,11); \ - } \ - hash_PeRlHaSh += (hash_PeRlHaSh << 3); \ - hash_PeRlHaSh ^= (hash_PeRlHaSh >> 11); \ - (hash) = (hash_PeRlHaSh + (hash_PeRlHaSh << 15)); \ - } STMT_END - -#elif defined(PERL_HASH_FUNC_SIPHASH) -#define PERL_HASH_FUNC "SIPHASH" -#define PERL_HASH_SEED_BYTES 16 - -/* This is SipHash by Jean-Philippe Aumasson and Daniel J. Bernstein. - * The authors claim it is relatively secure compared to the alternatives - * and that performance wise it is a suitable hash for languages like Perl. - * See: - * - * https://www.131002.net/siphash/ - * - * This implementation seems to perform slightly slower than one-at-a-time for - * short keys, but degrades slower for longer keys. Murmur Hash outperforms it - * regardless of keys size. - * - * It is 64 bit only. - */ - -#define PERL_HASH_NEEDS_TWO_SEEDS - -#ifndef U64 -#define U64 uint64_t -#endif - -#define ROTL(x,b) (U64)( ((x) << (b)) | ( (x) >> (64 - (b))) ) - -#define U32TO8_LE(p, v) \ - (p)[0] = (U8)((v) ); (p)[1] = (U8)((v) >> 8); \ - (p)[2] = (U8)((v) >> 16); (p)[3] = (U8)((v) >> 24); - -#define U64TO8_LE(p, v) \ - U32TO8_LE((p), (U32)((v) )); \ - U32TO8_LE((p) + 4, (U32)((v) >> 32)); - -#define U8TO64_LE(p) \ - (((U64)((p)[0]) ) | \ - ((U64)((p)[1]) << 8) | \ - ((U64)((p)[2]) << 16) | \ - ((U64)((p)[3]) << 24) | \ - ((U64)((p)[4]) << 32) | \ - ((U64)((p)[5]) << 40) | \ - ((U64)((p)[6]) << 48) | \ - ((U64)((p)[7]) << 56)) - -#define SIPROUND \ - do { \ - v0_PeRlHaSh += v1_PeRlHaSh; v1_PeRlHaSh=ROTL(v1_PeRlHaSh,13); v1_PeRlHaSh ^= v0_PeRlHaSh; v0_PeRlHaSh=ROTL(v0_PeRlHaSh,32); \ - v2_PeRlHaSh += v3_PeRlHaSh; v3_PeRlHaSh=ROTL(v3_PeRlHaSh,16); v3_PeRlHaSh ^= v2_PeRlHaSh; \ - v0_PeRlHaSh += v3_PeRlHaSh; v3_PeRlHaSh=ROTL(v3_PeRlHaSh,21); v3_PeRlHaSh ^= v0_PeRlHaSh; \ - v2_PeRlHaSh += v1_PeRlHaSh; v1_PeRlHaSh=ROTL(v1_PeRlHaSh,17); v1_PeRlHaSh ^= v2_PeRlHaSh; v2_PeRlHaSh=ROTL(v2_PeRlHaSh,32); \ - } while(0) - -/* SipHash-2-4 */ -#define PERL_HASH(hash,str,len) STMT_START { \ - const char * const strtmp_PeRlHaSh = (str); \ - const unsigned char *in_PeRlHaSh = (const unsigned char *)strtmp_PeRlHaSh; \ - const U32 inlen_PeRlHaSh = (len); \ - /* "somepseudorandomlygeneratedbytes" */ \ - U64 v0_PeRlHaSh = 0x736f6d6570736575ULL; \ - U64 v1_PeRlHaSh = 0x646f72616e646f6dULL; \ - U64 v2_PeRlHaSh = 0x6c7967656e657261ULL; \ - U64 v3_PeRlHaSh = 0x7465646279746573ULL; \ -\ - U64 b_PeRlHaSh; \ - U64 k0_PeRlHaSh = PERL_HASH_SEED_U64_1; \ - U64 k1_PeRlHaSh = PERL_HASH_SEED_U64_2; \ - U64 m_PeRlHaSh; \ - const int left_PeRlHaSh = inlen_PeRlHaSh & 7; \ - const U8 *end_PeRlHaSh = in_PeRlHaSh + inlen_PeRlHaSh - left_PeRlHaSh; \ -\ - b_PeRlHaSh = ( ( U64 )(len) ) << 56; \ - v3_PeRlHaSh ^= k1_PeRlHaSh; \ - v2_PeRlHaSh ^= k0_PeRlHaSh; \ - v1_PeRlHaSh ^= k1_PeRlHaSh; \ - v0_PeRlHaSh ^= k0_PeRlHaSh; \ -\ - for ( ; in_PeRlHaSh != end_PeRlHaSh; in_PeRlHaSh += 8 ) \ - { \ - m_PeRlHaSh = U8TO64_LE( in_PeRlHaSh ); \ - v3_PeRlHaSh ^= m_PeRlHaSh; \ - SIPROUND; \ - SIPROUND; \ - v0_PeRlHaSh ^= m_PeRlHaSh; \ - } \ -\ - switch( left_PeRlHaSh ) \ - { \ - case 7: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 6] ) << 48; \ - case 6: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 5] ) << 40; \ - case 5: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 4] ) << 32; \ - case 4: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 3] ) << 24; \ - case 3: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 2] ) << 16; \ - case 2: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 1] ) << 8; \ - case 1: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 0] ); break; \ - case 0: break; \ - } \ -\ - v3_PeRlHaSh ^= b_PeRlHaSh; \ - SIPROUND; \ - SIPROUND; \ - v0_PeRlHaSh ^= b_PeRlHaSh; \ -\ - v2_PeRlHaSh ^= 0xff; \ - SIPROUND; \ - SIPROUND; \ - SIPROUND; \ - SIPROUND; \ - b_PeRlHaSh = v0_PeRlHaSh ^ v1_PeRlHaSh ^ v2_PeRlHaSh ^ v3_PeRlHaSh; \ - (hash)= (U32)(b_PeRlHaSh & U32_MAX); \ -} STMT_END - -#elif defined(PERL_HASH_FUNC_SUPERFAST) -#define PERL_HASH_FUNC "SUPERFAST" -#define PERL_HASH_SEED_BYTES 4 -/* FYI: This is the "Super-Fast" algorithm mentioned by Bob Jenkins in - * (http://burtleburtle.net/bob/hash/doobs.html) - * It is by Paul Hsieh (c) 2004 and is analysed here - * http://www.azillionmonkeys.com/qed/hash.html - * license terms are here: - * http://www.azillionmonkeys.com/qed/weblicense.html - */ -#undef get16bits -#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ - || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) -#define get16bits(d) (*((const U16 *) (d))) -#endif - -#if !defined (get16bits) -#define get16bits(d) ((((const U8 *)(d))[1] << UINT32_C(8))\ - +((const U8 *)(d))[0]) -#endif -#define PERL_HASH(hash,str,len) \ - STMT_START { \ - const char * const strtmp_PeRlHaSh = (str); \ - const unsigned char *str_PeRlHaSh = (const unsigned char *)strtmp_PeRlHaSh; \ - U32 len_PeRlHaSh = (len); \ - U32 hash_PeRlHaSh = PERL_HASH_SEED_U32 ^ len; \ - U32 tmp_PeRlHaSh; \ - int rem_PeRlHaSh= len_PeRlHaSh & 3; \ - len_PeRlHaSh >>= 2; \ - \ - for (;len_PeRlHaSh > 0; len_PeRlHaSh--) { \ - hash_PeRlHaSh += get16bits (str_PeRlHaSh); \ - tmp_PeRlHaSh = (get16bits (str_PeRlHaSh+2) << 11) ^ hash_PeRlHaSh; \ - hash_PeRlHaSh = (hash_PeRlHaSh << 16) ^ tmp_PeRlHaSh; \ - str_PeRlHaSh += 2 * sizeof (U16); \ - hash_PeRlHaSh += hash_PeRlHaSh >> 11; \ - } \ - \ - /* Handle end cases */ \ - switch (rem_PeRlHaSh) { \ - case 3: hash_PeRlHaSh += get16bits (str_PeRlHaSh); \ - hash_PeRlHaSh ^= hash_PeRlHaSh << 16; \ - hash_PeRlHaSh ^= str_PeRlHaSh[sizeof (U16)] << 18; \ - hash_PeRlHaSh += hash_PeRlHaSh >> 11; \ - break; \ - case 2: hash_PeRlHaSh += get16bits (str_PeRlHaSh); \ - hash_PeRlHaSh ^= hash_PeRlHaSh << 11; \ - hash_PeRlHaSh += hash_PeRlHaSh >> 17; \ - break; \ - case 1: hash_PeRlHaSh += *str_PeRlHaSh; \ - hash_PeRlHaSh ^= hash_PeRlHaSh << 10; \ - hash_PeRlHaSh += hash_PeRlHaSh >> 1; \ - } \ - \ - /* Force "avalanching" of final 127 bits */ \ - hash_PeRlHaSh ^= hash_PeRlHaSh << 3; \ - hash_PeRlHaSh += hash_PeRlHaSh >> 5; \ - hash_PeRlHaSh ^= hash_PeRlHaSh << 4; \ - hash_PeRlHaSh += hash_PeRlHaSh >> 17; \ - hash_PeRlHaSh ^= hash_PeRlHaSh << 25; \ - (hash) = (hash_PeRlHaSh + (hash_PeRlHaSh >> 6)); \ - } STMT_END - -#elif defined(PERL_HASH_FUNC_MURMUR3) -#define PERL_HASH_FUNC "MURMUR3" -#define PERL_HASH_SEED_BYTES 4 - -/*----------------------------------------------------------------------------- - * MurmurHash3 was written by Austin Appleby, and is placed in the public - * domain. - * - * This implementation was originally written by Shane Day, and is also public domain, - * and was modified to function as a macro similar to other perl hash functions by - * Yves Orton. - * - * This is a portable ANSI C implementation of MurmurHash3_x86_32 (Murmur3A) - * with support for progressive processing. - * - * If you want to understand the MurmurHash algorithm you would be much better - * off reading the original source. Just point your browser at: - * http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp - * - * How does it work? - * - * We can only process entire 32 bit chunks of input, except for the very end - * that may be shorter. - * - * To handle endianess I simply use a macro that reads a U32 and define - * that macro to be a direct read on little endian machines, a read and swap - * on big endian machines, or a byte-by-byte read if the endianess is unknown. - */ - - -/*----------------------------------------------------------------------------- - * Endianess, misalignment capabilities and util macros - * - * The following 3 macros are defined in this section. The other macros defined - * are only needed to help derive these 3. - * - * MURMUR_READ_UINT32(x) Read a little endian unsigned 32-bit int - * MURMUR_UNALIGNED_SAFE Defined if READ_UINT32 works on non-word boundaries - * MURMUR_ROTL32(x,r) Rotate x left by r bits - */ - -/* Now find best way we can to READ_UINT32 */ -#if (BYTEORDER == 0x1234 || BYTEORDER == 0x12345678) && U32SIZE == 4 - /* CPU endian matches murmurhash algorithm, so read 32-bit word directly */ - #define MURMUR_READ_UINT32(ptr) (*((U32*)(ptr))) -#elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 - /* TODO: Add additional cases below where a compiler provided bswap32 is available */ - #if defined(__GNUC__) && (__GNUC__>4 || (__GNUC__==4 && __GNUC_MINOR__>=3)) - #define MURMUR_READ_UINT32(ptr) (__builtin_bswap32(*((U32*)(ptr)))) - #else - /* Without a known fast bswap32 we're just as well off doing this */ - #define MURMUR_READ_UINT32(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24) - #define MURMUR_UNALIGNED_SAFE - #endif -#else - /* Unknown endianess so last resort is to read individual bytes */ - #define MURMUR_READ_UINT32(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24) - - /* Since we're not doing word-reads we can skip the messing about with realignment */ - #define MURMUR_UNALIGNED_SAFE -#endif - -/* Find best way to ROTL32 */ -#if defined(_MSC_VER) - #include <stdlib.h> /* Microsoft put _rotl declaration in here */ - #define MURMUR_ROTL32(x,r) _rotl(x,r) -#else - /* gcc recognises this code and generates a rotate instruction for CPUs with one */ - #define MURMUR_ROTL32(x,r) (((U32)x << r) | ((U32)x >> (32 - r))) -#endif - - -/*----------------------------------------------------------------------------- - * Core murmurhash algorithm macros */ - -#define MURMUR_C1 (0xcc9e2d51) -#define MURMUR_C2 (0x1b873593) -#define MURMUR_C3 (0xe6546b64) -#define MURMUR_C4 (0x85ebca6b) -#define MURMUR_C5 (0xc2b2ae35) - -/* This is the main processing body of the algorithm. It operates - * on each full 32-bits of input. */ -#define MURMUR_DOBLOCK(h1, k1) STMT_START { \ - k1 *= MURMUR_C1; \ - k1 = MURMUR_ROTL32(k1,15); \ - k1 *= MURMUR_C2; \ - \ - h1 ^= k1; \ - h1 = MURMUR_ROTL32(h1,13); \ - h1 = h1 * 5 + MURMUR_C3; \ -} STMT_END - - -/* Append unaligned bytes to carry, forcing hash churn if we have 4 bytes */ -/* cnt=bytes to process, h1=name of h1 var, c=carry, n=bytes in c, ptr/len=payload */ -#define MURMUR_DOBYTES(cnt, h1, c, n, ptr, len) STMT_START { \ - int MURMUR_DOBYTES_i = cnt; \ - while(MURMUR_DOBYTES_i--) { \ - c = c>>8 | *ptr++<<24; \ - n++; len--; \ - if(n==4) { \ - MURMUR_DOBLOCK(h1, c); \ - n = 0; \ - } \ - } \ -} STMT_END - -/* process the last 1..3 bytes and finalize */ -#define MURMUR_FINALIZE(hash, PeRlHaSh_len, PeRlHaSh_k1, PeRlHaSh_h1, PeRlHaSh_carry, PeRlHaSh_bytes_in_carry, PeRlHaSh_ptr, PeRlHaSh_total_length) STMT_START { \ - /* Advance over whole 32-bit chunks, possibly leaving 1..3 bytes */\ - PeRlHaSh_len -= PeRlHaSh_len/4*4; \ - \ - /* Append any remaining bytes into carry */ \ - MURMUR_DOBYTES(PeRlHaSh_len, PeRlHaSh_h1, PeRlHaSh_carry, PeRlHaSh_bytes_in_carry, PeRlHaSh_ptr, PeRlHaSh_len); \ - \ - if (PeRlHaSh_bytes_in_carry) { \ - PeRlHaSh_k1 = PeRlHaSh_carry >> ( 4 - PeRlHaSh_bytes_in_carry ) * 8; \ - PeRlHaSh_k1 *= MURMUR_C1; \ - PeRlHaSh_k1 = MURMUR_ROTL32(PeRlHaSh_k1,15); \ - PeRlHaSh_k1 *= MURMUR_C2; \ - PeRlHaSh_h1 ^= PeRlHaSh_k1; \ - } \ - PeRlHaSh_h1 ^= PeRlHaSh_total_length; \ - \ - /* fmix */ \ - PeRlHaSh_h1 ^= PeRlHaSh_h1 >> 16; \ - PeRlHaSh_h1 *= MURMUR_C4; \ - PeRlHaSh_h1 ^= PeRlHaSh_h1 >> 13; \ - PeRlHaSh_h1 *= MURMUR_C5; \ - PeRlHaSh_h1 ^= PeRlHaSh_h1 >> 16; \ - (hash)= PeRlHaSh_h1; \ -} STMT_END - -/* now we create the hash function */ - -#if defined(UNALIGNED_SAFE) -#define PERL_HASH(hash,str,len) STMT_START { \ - const char * const s_PeRlHaSh_tmp = (str); \ - const unsigned char *PeRlHaSh_ptr = (const unsigned char *)s_PeRlHaSh_tmp; \ - I32 PeRlHaSh_len = len; \ - \ - U32 PeRlHaSh_h1 = PERL_HASH_SEED_U32; \ - U32 PeRlHaSh_k1; \ - U32 PeRlHaSh_carry = 0; \ - \ - const unsigned char *PeRlHaSh_end; \ - \ - int PeRlHaSh_bytes_in_carry = 0; /* bytes in carry */ \ - I32 PeRlHaSh_total_length= PeRlHaSh_len; \ - \ - /* This CPU handles unaligned word access */ \ - /* Process 32-bit chunks */ \ - PeRlHaSh_end = PeRlHaSh_ptr + PeRlHaSh_len/4*4; \ - for( ; PeRlHaSh_ptr < PeRlHaSh_end ; PeRlHaSh_ptr+=4) { \ - PeRlHaSh_k1 = MURMUR_READ_UINT32(PeRlHaSh_ptr); \ - MURMUR_DOBLOCK(PeRlHaSh_h1, PeRlHaSh_k1); \ - } \ - \ - MURMUR_FINALIZE(hash, PeRlHaSh_len, PeRlHaSh_k1, PeRlHaSh_h1, PeRlHaSh_carry, PeRlHaSh_bytes_in_carry, PeRlHaSh_ptr, PeRlHaSh_total_length);\ - } STMT_END -#else -#define PERL_HASH(hash,str,len) STMT_START { \ - const char * const s_PeRlHaSh_tmp = (str); \ - const unsigned char *PeRlHaSh_ptr = (const unsigned char *)s_PeRlHaSh_tmp; \ - I32 PeRlHaSh_len = len; \ - \ - U32 PeRlHaSh_h1 = PERL_HASH_SEED_U32; \ - U32 PeRlHaSh_k1; \ - U32 PeRlHaSh_carry = 0; \ - \ - const unsigned char *PeRlHaSh_end; \ - \ - int PeRlHaSh_bytes_in_carry = 0; /* bytes in carry */ \ - I32 PeRlHaSh_total_length= PeRlHaSh_len; \ - \ - /* This CPU does not handle unaligned word access */ \ - \ - /* Consume enough so that the next data byte is word aligned */ \ - int PeRlHaSh_i = -(long)PeRlHaSh_ptr & 3; \ - if(PeRlHaSh_i && PeRlHaSh_i <= PeRlHaSh_len) { \ - MURMUR_DOBYTES(PeRlHaSh_i, PeRlHaSh_h1, PeRlHaSh_carry, PeRlHaSh_bytes_in_carry, PeRlHaSh_ptr, PeRlHaSh_len);\ - } \ - \ - /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */ \ - PeRlHaSh_end = PeRlHaSh_ptr + PeRlHaSh_len/4*4; \ - switch(PeRlHaSh_bytes_in_carry) { /* how many bytes in carry */ \ - case 0: /* c=[----] w=[3210] b=[3210]=w c'=[----] */ \ - for( ; PeRlHaSh_ptr < PeRlHaSh_end ; PeRlHaSh_ptr+=4) { \ - PeRlHaSh_k1 = MURMUR_READ_UINT32(PeRlHaSh_ptr); \ - MURMUR_DOBLOCK(PeRlHaSh_h1, PeRlHaSh_k1); \ - } \ - break; \ - case 1: /* c=[0---] w=[4321] b=[3210]=c>>24|w<<8 c'=[4---] */ \ - for( ; PeRlHaSh_ptr < PeRlHaSh_end ; PeRlHaSh_ptr+=4) { \ - PeRlHaSh_k1 = PeRlHaSh_carry>>24; \ - PeRlHaSh_carry = MURMUR_READ_UINT32(PeRlHaSh_ptr); \ - PeRlHaSh_k1 |= PeRlHaSh_carry<<8; \ - MURMUR_DOBLOCK(PeRlHaSh_h1, PeRlHaSh_k1); \ - } \ - break; \ - case 2: /* c=[10--] w=[5432] b=[3210]=c>>16|w<<16 c'=[54--] */ \ - for( ; PeRlHaSh_ptr < PeRlHaSh_end ; PeRlHaSh_ptr+=4) { \ - PeRlHaSh_k1 = PeRlHaSh_carry>>16; \ - PeRlHaSh_carry = MURMUR_READ_UINT32(PeRlHaSh_ptr); \ - PeRlHaSh_k1 |= PeRlHaSh_carry<<16; \ - MURMUR_DOBLOCK(PeRlHaSh_h1, PeRlHaSh_k1); \ - } \ - break; \ - case 3: /* c=[210-] w=[6543] b=[3210]=c>>8|w<<24 c'=[654-] */ \ - for( ; PeRlHaSh_ptr < PeRlHaSh_end ; PeRlHaSh_ptr+=4) { \ - PeRlHaSh_k1 = PeRlHaSh_carry>>8; \ - PeRlHaSh_carry = MURMUR_READ_UINT32(PeRlHaSh_ptr); \ - PeRlHaSh_k1 |= PeRlHaSh_carry<<24; \ - MURMUR_DOBLOCK(PeRlHaSh_h1, PeRlHaSh_k1); \ - } \ - } \ - \ - MURMUR_FINALIZE(hash, PeRlHaSh_len, PeRlHaSh_k1, PeRlHaSh_h1, PeRlHaSh_carry, PeRlHaSh_bytes_in_carry, PeRlHaSh_ptr, PeRlHaSh_total_length);\ - } STMT_END -#endif - -#elif defined(PERL_HASH_FUNC_DJB2) -#define PERL_HASH_FUNC "DJB2" -#define PERL_HASH_SEED_BYTES 4 -#define PERL_HASH(hash,str,len) \ - STMT_START { \ - const char * const s_PeRlHaSh_tmp = (str); \ - const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \ - I32 i_PeRlHaSh = len; \ - U32 hash_PeRlHaSh = PERL_HASH_SEED_U32 ^ len; \ - while (i_PeRlHaSh--) { \ - hash_PeRlHaSh = ((hash_PeRlHaSh << 5) + hash_PeRlHaSh) + *s_PeRlHaSh++; \ - } \ - (hash) = hash_PeRlHaSh;\ - } STMT_END - -#elif defined(PERL_HASH_FUNC_SDBM) -#define PERL_HASH_FUNC "SDBM" -#define PERL_HASH_SEED_BYTES 4 -#define PERL_HASH(hash,str,len) \ - STMT_START { \ - const char * const s_PeRlHaSh_tmp = (str); \ - const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \ - I32 i_PeRlHaSh = len; \ - U32 hash_PeRlHaSh = PERL_HASH_SEED_U32 ^ len; \ - while (i_PeRlHaSh--) { \ - hash_PeRlHaSh = (hash_PeRlHaSh << 6) + (hash_PeRlHaSh << 16) - hash_PeRlHaSh + *s_PeRlHaSh++; \ - } \ - (hash) = hash_PeRlHaSh;\ - } STMT_END - -#elif defined(PERL_HASH_FUNC_ONE_AT_A_TIME) || defined(PERL_HASH_FUNC_ONE_AT_A_TIME_OLD) - -#define PERL_HASH_SEED_BYTES 4 - -#ifdef PERL_HASH_FUNC_ONE_AT_A_TIME -/* new version, add the length to the seed so that adding characters changes the "seed" being used. */ -#define PERL_HASH_FUNC "ONE_AT_A_TIME" -#define MIX_SEED_AND_LEN(seed,len) (seed + len) -#else -/* old version, just use the seed. - not recommended */ -#define PERL_HASH_FUNC "ONE_AT_A_TIME_OLD" -#define MIX_SEED_AND_LEN(seed,len) (seed) -#endif - -/* FYI: This is the "One-at-a-Time" algorithm by Bob Jenkins - * from requirements by Colin Plumb. - * (http://burtleburtle.net/bob/hash/doobs.html) */ -#define PERL_HASH(hash,str,len) \ - STMT_START { \ - const char * const s_PeRlHaSh_tmp = (str); \ - const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \ - const unsigned char *end_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp + (len); \ - U32 hash_PeRlHaSh = MIX_SEED_AND_LEN(PERL_HASH_SEED_U32, len); \ - while (s_PeRlHaSh < end_PeRlHaSh) { \ - hash_PeRlHaSh += (U8)*s_PeRlHaSh++; \ - hash_PeRlHaSh += (hash_PeRlHaSh << 10); \ - hash_PeRlHaSh ^= (hash_PeRlHaSh >> 6); \ - } \ - hash_PeRlHaSh += (hash_PeRlHaSh << 3); \ - hash_PeRlHaSh ^= (hash_PeRlHaSh >> 11); \ - (hash) = (hash_PeRlHaSh + (hash_PeRlHaSh << 15)); \ - } STMT_END -#endif -#ifndef PERL_HASH -#error "No hash function defined!" -#endif /* =head1 Hash Manipulation Functions @@ -1131,6 +559,8 @@ Creates a new HV. The reference count is set to 1. #define newHV() MUTABLE_HV(newSV_type(SVt_PVHV)) +#include "hv_func.h" + /* * Local variables: * c-indentation-style: bsd diff --git a/hv_func.h b/hv_func.h new file mode 100644 index 0000000000..fdb4ad8b41 --- /dev/null +++ b/hv_func.h @@ -0,0 +1,509 @@ +/* hash a key + *-------------------------------------------------------------------------------------- + * The "hash seed" feature was added in Perl 5.8.1 to perturb the results + * to avoid "algorithmic complexity attacks". + * + * If USE_HASH_SEED is defined, hash randomisation is done by default + * If USE_HASH_SEED_EXPLICIT is defined, hash randomisation is done + * only if the environment variable PERL_HASH_SEED is set. + * (see also perl.c:perl_parse() and S_init_tls_and_interp() and util.c:get_hash_seed()) + */ + +#ifndef PERL_SEEN_HV_FUNC_H /* compile once */ +#define PERL_SEEN_HV_FUNC_H + +#if !( 0 \ + || defined(PERL_HASH_FUNC_SDBM) \ + || defined(PERL_HASH_FUNC_DJB2) \ + || defined(PERL_HASH_FUNC_SUPERFAST) \ + || defined(PERL_HASH_FUNC_MURMUR3) \ + || defined(PERL_HASH_FUNC_ONE_AT_A_TIME) \ + || defined(PERL_HASH_FUNC_ONE_AT_A_TIME_OLD) \ + ) +#ifdef HAS_QUAD +#define PERL_HASH_FUNC_SIPHASH +#else +#define PERL_HASH_FUNC_ONE_AT_A_TIME +#endif +#endif + +#if defined(PERL_HASH_FUNC_SIPHASH) +# define PERL_HASH_FUNC "SIPHASH_2_4" +# define PERL_HASH_SEED_BYTES 16 +# define PERL_HASH(hash,str,len) (hash)= S_perl_hash_siphash_2_4(PERL_HASH_SEED,(U8*)(str),(len)) +#elif defined(PERL_HASH_FUNC_SUPERFAST) +# define PERL_HASH_FUNC "SUPERFAST" +# define PERL_HASH_SEED_BYTES 4 +# define PERL_HASH(hash,str,len) (hash)= S_perl_hash_superfast(PERL_HASH_SEED,(U8*)(str),(len)) +#elif defined(PERL_HASH_FUNC_MURMUR3) +# define PERL_HASH_FUNC "MURMUR3" +# define PERL_HASH_SEED_BYTES 4 +# define PERL_HASH(hash,str,len) (hash)= S_perl_hash_murmur3(PERL_HASH_SEED,(U8*)(str),(len)) +#elif defined(PERL_HASH_FUNC_DJB2) +# define PERL_HASH_FUNC "DJB2" +# define PERL_HASH_SEED_BYTES 4 +# define PERL_HASH(hash,str,len) (hash)= S_perl_hash_djb2(PERL_HASH_SEED,(U8*)(str),(len)) +#elif defined(PERL_HASH_FUNC_SDBM) +# define PERL_HASH_FUNC "SDBM" +# define PERL_HASH_SEED_BYTES 4 +# define PERL_HASH(hash,str,len) (hash)= S_perl_hash_sdbm(PERL_HASH_SEED,(U8*)(str),(len)) +#elif defined(PERL_HASH_FUNC_ONE_AT_A_TIME) +# define PERL_HASH_FUNC "ONE_AT_A_TIME" +# define PERL_HASH_SEED_BYTES 4 +# define PERL_HASH(hash,str,len) (hash)= S_perl_hash_one_at_a_time(PERL_HASH_SEED,(U8*)(str),(len)) +#elif defined(PERL_HASH_FUNC_ONE_AT_A_TIME_OLD) +# define PERL_HASH_FUNC "ONE_AT_A_TIME_OLD" +# define PERL_HASH_SEED_BYTES 4 +# define PERL_HASH(hash,str,len) (hash)= S_perl_hash_old_one_at_a_time(PERL_HASH_SEED,(U8*)(str),(len)) +#endif + +#ifndef PERL_HASH +#error "No hash function defined!" +#endif +#ifndef PERL_HASH_SEED_BYTES +#error "PERL_HASH_SEED_BYTES not defined" +#endif +#ifndef PERL_HASH_FUNC +#error "PERL_HASH_FUNC not defined" +#endif + +#ifndef PERL_HASH_SEED +# if defined(USE_HASH_SEED) || defined(USE_HASH_SEED_EXPLICIT) +# define PERL_HASH_SEED PL_hash_seed +# elif PERL_HASH_SEED_BYTES == 4 +# define PERL_HASH_SEED "PeRl" +# elif PERL_HASH_SEED_BYTES == 16 +# define PERL_HASH_SEED "PeRlHaShhAcKpErl" +# else +# error "No PERL_HASH_SEED definition for " PERL_HASH_FUNC +# endif +#endif + +/*----------------------------------------------------------------------------- + * Endianess, misalignment capabilities and util macros + * + * The following 3 macros are defined in this section. The other macros defined + * are only needed to help derive these 3. + * + * U8TO32_LE(x) Read a little endian unsigned 32-bit int + * UNALIGNED_SAFE Defined if READ_UINT32 works on non-word boundaries + * ROTL32(x,r) Rotate x left by r bits + */ + +#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ + || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) +#define U8TO16_LE(d) (*((const U16 *) (d))) +#endif + +#if !defined (U8TO16_LE) +#define U8TO16_LE(d) ((((const U8 *)(d))[1] << 8)\ + +((const U8 *)(d))[0]) +#endif + + +/* Now find best way we can to READ_UINT32 */ +#if (BYTEORDER == 0x1234 || BYTEORDER == 0x12345678) && U32SIZE == 4 + /* CPU endian matches murmurhash algorithm, so read 32-bit word directly */ + #define U8TO32_LE(ptr) (*((U32*)(ptr))) +#elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 + /* TODO: Add additional cases below where a compiler provided bswap32 is available */ + #if defined(__GNUC__) && (__GNUC__>4 || (__GNUC__==4 && __GNUC_MINOR__>=3)) + #define U8TO32_LE(ptr) (__builtin_bswap32(*((U32*)(ptr)))) + #else + /* Without a known fast bswap32 we're just as well off doing this */ + #define U8TO32_LE(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24) + #define UNALIGNED_SAFE + #endif +#else + /* Unknown endianess so last resort is to read individual bytes */ + #define U8TO32_LE(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24) + /* Since we're not doing word-reads we can skip the messing about with realignment */ + #define UNALIGNED_SAFE +#endif + +/* Find best way to ROTL32 */ +#if defined(_MSC_VER) + #include <stdlib.h> /* Microsoft put _rotl declaration in here */ + #define ROTL32(x,r) _rotl(x,r) +#else + /* gcc recognises this code and generates a rotate instruction for CPUs with one */ + #define ROTL32(x,r) (((U32)x << r) | ((U32)x >> (32 - r))) +#endif + + +/* This is SipHash by Jean-Philippe Aumasson and Daniel J. Bernstein. + * The authors claim it is relatively secure compared to the alternatives + * and that performance wise it is a suitable hash for languages like Perl. + * See: + * + * https://www.131002.net/siphash/ + * + * This implementation seems to perform slightly slower than one-at-a-time for + * short keys, but degrades slower for longer keys. Murmur Hash outperforms it + * regardless of keys size. + * + * It is 64 bit only. + */ + +#ifdef HAS_QUAD + +#ifndef U64TYPE +/* This probably isn't going to work, but failing with a compiler error due to + lack of uint64_t is no worse than failing right now with an #error. */ +#define U64TYPE uint64_t +#endif + + +#define ROTL64(x,b) (U64TYPE)( ((x) << (b)) | ( (x) >> (64 - (b))) ) + +#define U8TO64_LE(p) \ + (((U64TYPE)((p)[0]) ) | \ + ((U64TYPE)((p)[1]) << 8) | \ + ((U64TYPE)((p)[2]) << 16) | \ + ((U64TYPE)((p)[3]) << 24) | \ + ((U64TYPE)((p)[4]) << 32) | \ + ((U64TYPE)((p)[5]) << 40) | \ + ((U64TYPE)((p)[6]) << 48) | \ + ((U64TYPE)((p)[7]) << 56)) + +#define SIPROUND \ + do { \ + v0 += v1; v1=ROTL64(v1,13); v1 ^= v0; v0=ROTL64(v0,32); \ + v2 += v3; v3=ROTL64(v3,16); v3 ^= v2; \ + v0 += v3; v3=ROTL64(v3,21); v3 ^= v0; \ + v2 += v1; v1=ROTL64(v1,17); v1 ^= v2; v2=ROTL64(v2,32); \ + } while(0) + +/* SipHash-2-4 */ + +PERL_STATIC_INLINE U32 +S_perl_hash_siphash_2_4(const unsigned char * const seed, const unsigned char *in, const STRLEN inlen) { + /* "somepseudorandomlygeneratedbytes" */ + U64TYPE v0 = 0x736f6d6570736575ULL; + U64TYPE v1 = 0x646f72616e646f6dULL; + U64TYPE v2 = 0x6c7967656e657261ULL; + U64TYPE v3 = 0x7465646279746573ULL; + + U64TYPE b; + U64TYPE k0 = ((U64TYPE*)seed)[0]; + U64TYPE k1 = ((U64TYPE*)seed)[1]; + U64TYPE m; + const int left = inlen & 7; + const U8 *end = in + inlen - left; + + b = ( ( U64TYPE )(inlen) ) << 56; + v3 ^= k1; + v2 ^= k0; + v1 ^= k1; + v0 ^= k0; + + for ( ; in != end; in += 8 ) + { + m = U8TO64_LE( in ); + v3 ^= m; + SIPROUND; + SIPROUND; + v0 ^= m; + } + + switch( left ) + { + case 7: b |= ( ( U64TYPE )in[ 6] ) << 48; + case 6: b |= ( ( U64TYPE )in[ 5] ) << 40; + case 5: b |= ( ( U64TYPE )in[ 4] ) << 32; + case 4: b |= ( ( U64TYPE )in[ 3] ) << 24; + case 3: b |= ( ( U64TYPE )in[ 2] ) << 16; + case 2: b |= ( ( U64TYPE )in[ 1] ) << 8; + case 1: b |= ( ( U64TYPE )in[ 0] ); break; + case 0: break; + } + + v3 ^= b; + SIPROUND; + SIPROUND; + v0 ^= b; + + v2 ^= 0xff; + SIPROUND; + SIPROUND; + SIPROUND; + SIPROUND; + b = v0 ^ v1 ^ v2 ^ v3; + return (U32)(b & U32_MAX); +} +#endif /* defined(HAS_QUAD) */ + +/* FYI: This is the "Super-Fast" algorithm mentioned by Bob Jenkins in + * (http://burtleburtle.net/bob/hash/doobs.html) + * It is by Paul Hsieh (c) 2004 and is analysed here + * http://www.azillionmonkeys.com/qed/hash.html + * license terms are here: + * http://www.azillionmonkeys.com/qed/weblicense.html + */ + + +PERL_STATIC_INLINE U32 +S_perl_hash_superfast(const unsigned char * const seed, const unsigned char *str, STRLEN len) { + U32 hash = *((U32*)seed) + len; + U32 tmp; + int rem= len & 3; + len >>= 2; + + for (;len > 0; len--) { + hash += U8TO16_LE (str); + tmp = (U8TO16_LE (str+2) << 11) ^ hash; + hash = (hash << 16) ^ tmp; + str += 2 * sizeof (U16); + hash += hash >> 11; + } + + /* Handle end cases */ + switch (rem) { \ + case 3: hash += U8TO16_LE (str); + hash ^= hash << 16; + hash ^= str[sizeof (U16)] << 18; + hash += hash >> 11; + break; + case 2: hash += U8TO16_LE (str); + hash ^= hash << 11; + hash += hash >> 17; + break; + case 1: hash += *str; + hash ^= hash << 10; + hash += hash >> 1; + } + /* Force "avalanching" of final 127 bits */ + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 4; + hash += hash >> 17; + hash ^= hash << 25; + return (hash + (hash >> 6)); +} + + +/*----------------------------------------------------------------------------- + * MurmurHash3 was written by Austin Appleby, and is placed in the public + * domain. + * + * This implementation was originally written by Shane Day, and is also public domain, + * and was modified to function as a macro similar to other perl hash functions by + * Yves Orton. + * + * This is a portable ANSI C implementation of MurmurHash3_x86_32 (Murmur3A) + * with support for progressive processing. + * + * If you want to understand the MurmurHash algorithm you would be much better + * off reading the original source. Just point your browser at: + * http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp + * + * How does it work? + * + * We can only process entire 32 bit chunks of input, except for the very end + * that may be shorter. + * + * To handle endianess I simply use a macro that reads a U32 and define + * that macro to be a direct read on little endian machines, a read and swap + * on big endian machines, or a byte-by-byte read if the endianess is unknown. + */ + + +/*----------------------------------------------------------------------------- + * Core murmurhash algorithm macros */ + +#define MURMUR_C1 (0xcc9e2d51) +#define MURMUR_C2 (0x1b873593) +#define MURMUR_C3 (0xe6546b64) +#define MURMUR_C4 (0x85ebca6b) +#define MURMUR_C5 (0xc2b2ae35) + +/* This is the main processing body of the algorithm. It operates + * on each full 32-bits of input. */ +#define MURMUR_DOBLOCK(h1, k1) STMT_START { \ + k1 *= MURMUR_C1; \ + k1 = ROTL32(k1,15); \ + k1 *= MURMUR_C2; \ + \ + h1 ^= k1; \ + h1 = ROTL32(h1,13); \ + h1 = h1 * 5 + MURMUR_C3; \ +} STMT_END + + +/* Append unaligned bytes to carry, forcing hash churn if we have 4 bytes */ +/* cnt=bytes to process, h1=name of h1 var, c=carry, n=bytes in c, ptr/len=payload */ +#define MURMUR_DOBYTES(cnt, h1, c, n, ptr, len) STMT_START { \ + int MURMUR_DOBYTES_i = cnt; \ + while(MURMUR_DOBYTES_i--) { \ + c = c>>8 | *ptr++<<24; \ + n++; len--; \ + if(n==4) { \ + MURMUR_DOBLOCK(h1, c); \ + n = 0; \ + } \ + } \ +} STMT_END + + +/* now we create the hash function */ +PERL_STATIC_INLINE U32 +S_perl_hash_murmur3(const unsigned char * const seed, const unsigned char *ptr, STRLEN len) { + U32 h1 = *((U32*)seed); + U32 k1; + U32 carry = 0; + + const unsigned char *end; + int bytes_in_carry = 0; /* bytes in carry */ + I32 total_length= len; + +#if defined(UNALIGNED_SAFE) + /* Handle carry: commented out as its only used in incremental mode - it never fires for us + int i = (4-n) & 3; + if(i && i <= len) { + MURMUR_DOBYTES(i, h1, carry, bytes_in_carry, ptr, len); + } + */ + + /* This CPU handles unaligned word access */ + /* Process 32-bit chunks */ + end = ptr + len/4*4; + for( ; ptr < end ; ptr+=4) { + k1 = U8TO32_LE(ptr); + MURMUR_DOBLOCK(h1, k1); + } +#else + /* This CPU does not handle unaligned word access */ + + /* Consume enough so that the next data byte is word aligned */ + int i = -(long)ptr & 3; + if(i && i <= len) { + MURMUR_DOBYTES(i, h1, carry, bytes_in_carry, ptr, len); + } + + /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */ + end = ptr + len/4*4; + switch(bytes_in_carry) { /* how many bytes in carry */ + case 0: /* c=[----] w=[3210] b=[3210]=w c'=[----] */ + for( ; ptr < end ; ptr+=4) { + k1 = U8TO32_LE(ptr); + MURMUR_DOBLOCK(h1, k1); + } + break; + case 1: /* c=[0---] w=[4321] b=[3210]=c>>24|w<<8 c'=[4---] */ + for( ; ptr < end ; ptr+=4) { + k1 = carry>>24; + carry = U8TO32_LE(ptr); + k1 |= carry<<8; + MURMUR_DOBLOCK(h1, k1); + } + break; + case 2: /* c=[10--] w=[5432] b=[3210]=c>>16|w<<16 c'=[54--] */ + for( ; ptr < end ; ptr+=4) { + k1 = carry>>16; + carry = U8TO32_LE(ptr); + k1 |= carry<<16; + MURMUR_DOBLOCK(h1, k1); + } + break; + case 3: /* c=[210-] w=[6543] b=[3210]=c>>8|w<<24 c'=[654-] */ + for( ; ptr < end ; ptr+=4) { + k1 = carry>>8; + carry = U8TO32_LE(ptr); + k1 |= carry<<24; + MURMUR_DOBLOCK(h1, k1); + } + } +#endif + /* Advance over whole 32-bit chunks, possibly leaving 1..3 bytes */ + len -= len/4*4; + + /* Append any remaining bytes into carry */ + MURMUR_DOBYTES(len, h1, carry, bytes_in_carry, ptr, len); + + if (bytes_in_carry) { + k1 = carry >> ( 4 - bytes_in_carry ) * 8; + k1 *= MURMUR_C1; + k1 = ROTL32(k1,15); + k1 *= MURMUR_C2; + h1 ^= k1; + } + h1 ^= total_length; + + /* fmix */ + h1 ^= h1 >> 16; + h1 *= MURMUR_C4; + h1 ^= h1 >> 13; + h1 *= MURMUR_C5; + h1 ^= h1 >> 16; + return h1; +} + + +PERL_STATIC_INLINE U32 +S_perl_hash_djb2(const unsigned char * const seed, const unsigned char *str, const STRLEN len) { + const unsigned char * const end = (const unsigned char *)str + len; + U32 hash = *((U32*)seed + len); + while (str < end) { + hash = ((hash << 5) + hash) + *str++; + } + return hash; +} + +PERL_STATIC_INLINE U32 +S_perl_hash_sdbm(const unsigned char * const seed, const unsigned char *str, const STRLEN len) { + const unsigned char * const end = (const unsigned char *)str + len; + U32 hash = *((U32*)seed + len); + while (str < end) { + hash = (hash << 6) + (hash << 16) - hash + *str++; + } + return hash; +} + + +/* FYI: This is the "One-at-a-Time" algorithm by Bob Jenkins + * from requirements by Colin Plumb. + * (http://burtleburtle.net/bob/hash/doobs.html) */ +PERL_STATIC_INLINE U32 +S_perl_hash_one_at_a_time(const unsigned char * const seed, const unsigned char *str, const STRLEN len) { + const unsigned char * const end = (const unsigned char *)str + len; + U32 hash = *((U32*)seed) + len; + while (str < end) { + hash += *str++; + hash += (hash << 10); + hash ^= (hash >> 6); + } + hash += (hash << 3); + hash ^= (hash >> 11); + return (hash + (hash << 15)); +} + +PERL_STATIC_INLINE U32 +S_perl_hash_old_one_at_a_time(const unsigned char * const seed, const unsigned char *str, const STRLEN len) { + const unsigned char * const end = (const unsigned char *)str + len; + U32 hash = *((U32*)seed); + while (str < end) { + hash += *str++; + hash += (hash << 10); + hash ^= (hash >> 6); + } + hash += (hash << 3); + hash ^= (hash >> 11); + return (hash + (hash << 15)); +} + +/* legacy - only mod_perl should be doing this. */ +#ifdef PERL_HASH_INTERNAL_ACCESS +#define PERL_HASH_INTERNAL(hash,str,len) PERL_HASH(hash,str,len) +#endif + +#endif /*compile once*/ + +/* + * Local variables: + * c-indentation-style: bsd + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * + * ex: set ts=8 sts=4 sw=4 et: + */ diff --git a/win32/Makefile b/win32/Makefile index 5c0be56933..012d87fe59 100644 --- a/win32/Makefile +++ b/win32/Makefile @@ -712,6 +712,7 @@ CORE_NOCFG_H = \ ..\gv.h \ ..\handy.h \ ..\hv.h \ + ..\hv_func.h \ ..\iperlsys.h \ ..\mg.h \ ..\nostdio.h \ diff --git a/win32/Makefile.ce b/win32/Makefile.ce index 9013eb86c9..b94c47a666 100644 --- a/win32/Makefile.ce +++ b/win32/Makefile.ce @@ -652,6 +652,7 @@ CORE_NOCFG_H = \ ..\gv.h \ ..\handy.h \ ..\hv.h \ + ..\hv_func.h \ ..\iperlsys.h \ ..\mg.h \ ..\nostdio.h \ |