diff options
Diffstat (limited to 'girepository/cmph/jenkins_hash.c')
-rw-r--r-- | girepository/cmph/jenkins_hash.c | 297 |
1 files changed, 297 insertions, 0 deletions
diff --git a/girepository/cmph/jenkins_hash.c b/girepository/cmph/jenkins_hash.c new file mode 100644 index 00000000..f5233a5a --- /dev/null +++ b/girepository/cmph/jenkins_hash.c @@ -0,0 +1,297 @@ +#include "jenkins_hash.h" +#include <stdlib.h> +#ifdef WIN32 +#define _USE_MATH_DEFINES //For M_LOG2E +#endif +#include <math.h> +#include <limits.h> +#include <string.h> + +//#define DEBUG +#include "debug.h" + +#define hashsize(n) ((cmph_uint32)1<<(n)) +#define hashmask(n) (hashsize(n)-1) + + + +//#define NM2 /* Define this if you do not want power of 2 table sizes*/ + + +/* + -------------------------------------------------------------------- + mix -- mix 3 32-bit values reversibly. + For every delta with one or two bits set, and the deltas of all three + high bits or all three low bits, whether the original value of a,b,c + is almost all zero or is uniformly distributed, + * If mix() is run forward or backward, at least 32 bits in a,b,c + have at least 1/4 probability of changing. + * If mix() is run forward, every bit of c will change between 1/3 and + 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.) + mix() was built out of 36 single-cycle latency instructions in a + structure that could supported 2x parallelism, like so: + a -= b; + a -= c; x = (c>>13); + b -= c; a ^= x; + b -= a; x = (a<<8); + c -= a; b ^= x; + c -= b; x = (b>>13); + ... + Unfortunately, superscalar Pentiums and Sparcs can't take advantage + of that parallelism. They've also turned some of those single-cycle + latency instructions into multi-cycle latency instructions. Still, + this is the fastest good hash I could find. There were about 2^^68 + to choose from. I only looked at a billion or so. + -------------------------------------------------------------------- + */ +#define mix(a,b,c) \ +{ \ + a -= b; a -= c; a ^= (c>>13); \ + b -= c; b -= a; b ^= (a<<8); \ + c -= a; c -= b; c ^= (b>>13); \ + a -= b; a -= c; a ^= (c>>12); \ + b -= c; b -= a; b ^= (a<<16); \ + c -= a; c -= b; c ^= (b>>5); \ + a -= b; a -= c; a ^= (c>>3); \ + b -= c; b -= a; b ^= (a<<10); \ + c -= a; c -= b; c ^= (b>>15); \ +} + +/* + -------------------------------------------------------------------- + hash() -- hash a variable-length key into a 32-bit value +k : the key (the unaligned variable-length array of bytes) +len : the length of the key, counting by bytes +initval : can be any 4-byte value +Returns a 32-bit value. Every bit of the key affects every bit of +the return value. Every 1-bit and 2-bit delta achieves avalanche. +About 6*len+35 instructions. + +The best hash table sizes are powers of 2. There is no need to do +mod a prime (mod is sooo slow!). If you need less than 32 bits, +use a bitmask. For example, if you need only 10 bits, do +h = (h & hashmask(10)); +In which case, the hash table should have hashsize(10) elements. + +If you are hashing n strings (cmph_uint8 **)k, do it like this: +for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h); + +By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this +code any way you wish, private, educational, or commercial. It's free. + +See http://burtleburtle.net/bob/hash/evahash.html +Use for hash table lookup, or anything where one collision in 2^^32 is +acceptable. Do NOT use for cryptographic purposes. +-------------------------------------------------------------------- + */ +jenkins_state_t *jenkins_state_new(cmph_uint32 size) //size of hash table +{ + jenkins_state_t *state = (jenkins_state_t *)malloc(sizeof(jenkins_state_t)); + DEBUGP("Initializing jenkins hash\n"); + state->seed = ((cmph_uint32)rand() % size); + return state; +} +void jenkins_state_destroy(jenkins_state_t *state) +{ + free(state); +} + + +inline void __jenkins_hash_vector(cmph_uint32 seed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes) +{ + register cmph_uint32 len, length; + + /* Set up the internal state */ + length = keylen; + len = length; + hashes[0] = hashes[1] = 0x9e3779b9; /* the golden ratio; an arbitrary value */ + hashes[2] = seed; /* the previous hash value - seed in our case */ + + /*---------------------------------------- handle most of the key */ + while (len >= 12) + { + hashes[0] += ((cmph_uint32)k[0] +((cmph_uint32)k[1]<<8) +((cmph_uint32)k[2]<<16) +((cmph_uint32)k[3]<<24)); + hashes[1] += ((cmph_uint32)k[4] +((cmph_uint32)k[5]<<8) +((cmph_uint32)k[6]<<16) +((cmph_uint32)k[7]<<24)); + hashes[2] += ((cmph_uint32)k[8] +((cmph_uint32)k[9]<<8) +((cmph_uint32)k[10]<<16)+((cmph_uint32)k[11]<<24)); + mix(hashes[0],hashes[1],hashes[2]); + k += 12; len -= 12; + } + + /*------------------------------------- handle the last 11 bytes */ + hashes[2] += length; + switch(len) /* all the case statements fall through */ + { + case 11: + hashes[2] +=((cmph_uint32)k[10]<<24); + case 10: + hashes[2] +=((cmph_uint32)k[9]<<16); + case 9 : + hashes[2] +=((cmph_uint32)k[8]<<8); + /* the first byte of hashes[2] is reserved for the length */ + case 8 : + hashes[1] +=((cmph_uint32)k[7]<<24); + case 7 : + hashes[1] +=((cmph_uint32)k[6]<<16); + case 6 : + hashes[1] +=((cmph_uint32)k[5]<<8); + case 5 : + hashes[1] +=(cmph_uint8) k[4]; + case 4 : + hashes[0] +=((cmph_uint32)k[3]<<24); + case 3 : + hashes[0] +=((cmph_uint32)k[2]<<16); + case 2 : + hashes[0] +=((cmph_uint32)k[1]<<8); + case 1 : + hashes[0] +=(cmph_uint8)k[0]; + /* case 0: nothing left to add */ + } + + mix(hashes[0],hashes[1],hashes[2]); +} + +cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keylen) +{ + cmph_uint32 hashes[3]; + __jenkins_hash_vector(state->seed, k, keylen, hashes); + return hashes[2]; +/* cmph_uint32 a, b, c; + cmph_uint32 len, length; + + // Set up the internal state + length = keylen; + len = length; + a = b = 0x9e3779b9; // the golden ratio; an arbitrary value + c = state->seed; // the previous hash value - seed in our case + + // handle most of the key + while (len >= 12) + { + a += (k[0] +((cmph_uint32)k[1]<<8) +((cmph_uint32)k[2]<<16) +((cmph_uint32)k[3]<<24)); + b += (k[4] +((cmph_uint32)k[5]<<8) +((cmph_uint32)k[6]<<16) +((cmph_uint32)k[7]<<24)); + c += (k[8] +((cmph_uint32)k[9]<<8) +((cmph_uint32)k[10]<<16)+((cmph_uint32)k[11]<<24)); + mix(a,b,c); + k += 12; len -= 12; + } + + // handle the last 11 bytes + c += length; + switch(len) /// all the case statements fall through + { + case 11: + c +=((cmph_uint32)k[10]<<24); + case 10: + c +=((cmph_uint32)k[9]<<16); + case 9 : + c +=((cmph_uint32)k[8]<<8); + // the first byte of c is reserved for the length + case 8 : + b +=((cmph_uint32)k[7]<<24); + case 7 : + b +=((cmph_uint32)k[6]<<16); + case 6 : + b +=((cmph_uint32)k[5]<<8); + case 5 : + b +=k[4]; + case 4 : + a +=((cmph_uint32)k[3]<<24); + case 3 : + a +=((cmph_uint32)k[2]<<16); + case 2 : + a +=((cmph_uint32)k[1]<<8); + case 1 : + a +=k[0]; + // case 0: nothing left to add + } + + mix(a,b,c); + + /// report the result + + return c; + */ +} + +void jenkins_hash_vector_(jenkins_state_t *state, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes) +{ + __jenkins_hash_vector(state->seed, k, keylen, hashes); +} + +void jenkins_state_dump(jenkins_state_t *state, char **buf, cmph_uint32 *buflen) +{ + *buflen = sizeof(cmph_uint32); + *buf = (char *)malloc(sizeof(cmph_uint32)); + if (!*buf) + { + *buflen = UINT_MAX; + return; + } + memcpy(*buf, &(state->seed), sizeof(cmph_uint32)); + DEBUGP("Dumped jenkins state with seed %u\n", state->seed); + return; +} + +jenkins_state_t *jenkins_state_copy(jenkins_state_t *src_state) +{ + jenkins_state_t *dest_state = (jenkins_state_t *)malloc(sizeof(jenkins_state_t)); + dest_state->hashfunc = src_state->hashfunc; + dest_state->seed = src_state->seed; + return dest_state; +} + +jenkins_state_t *jenkins_state_load(const char *buf, cmph_uint32 buflen) +{ + jenkins_state_t *state = (jenkins_state_t *)malloc(sizeof(jenkins_state_t)); + state->seed = *(cmph_uint32 *)buf; + state->hashfunc = CMPH_HASH_JENKINS; + DEBUGP("Loaded jenkins state with seed %u\n", state->seed); + return state; +} + + +/** \fn void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed); + * \brief Support the ability to pack a jenkins function into a preallocated contiguous memory space pointed by jenkins_packed. + * \param state points to the jenkins function + * \param jenkins_packed pointer to the contiguous memory area used to store the jenkins function. The size of jenkins_packed must be at least jenkins_state_packed_size() + */ +void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed) +{ + if (state && jenkins_packed) + { + memcpy(jenkins_packed, &(state->seed), sizeof(cmph_uint32)); + } +} + +/** \fn cmph_uint32 jenkins_state_packed_size(jenkins_state_t *state); + * \brief Return the amount of space needed to pack a jenkins function. + * \return the size of the packed function or zero for failures + */ +cmph_uint32 jenkins_state_packed_size() +{ + return sizeof(cmph_uint32); +} + + +/** \fn cmph_uint32 jenkins_hash_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen); + * \param jenkins_packed is a pointer to a contiguous memory area + * \param key is a pointer to a key + * \param keylen is the key length + * \return an integer that represents a hash value of 32 bits. + */ +cmph_uint32 jenkins_hash_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen) +{ + cmph_uint32 hashes[3]; + __jenkins_hash_vector(*((cmph_uint32 *)jenkins_packed), k, keylen, hashes); + return hashes[2]; +} + +/** \fn jenkins_hash_vector_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes); + * \param jenkins_packed is a pointer to a contiguous memory area + * \param key is a pointer to a key + * \param keylen is the key length + * \param hashes is a pointer to a memory large enough to fit three 32-bit integers. + */ +void jenkins_hash_vector_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes) +{ + __jenkins_hash_vector(*((cmph_uint32 *)jenkins_packed), k, keylen, hashes); +} |