1 files changed, 297 insertions, 0 deletions
diff --git a/girepository/cmph/jenkins_hash.c b/girepository/cmph/jenkins_hash.c
new file mode 100644
index 00000000..f5233a5a
--- /dev/null
+++ b/girepository/cmph/jenkins_hash.c
@@ -0,0 +1,297 @@
+#include "jenkins_hash.h"
+#include <stdlib.h>
+#ifdef WIN32
+#define _USE_MATH_DEFINES //For M_LOG2E
+#endif
+#include <math.h>
+#include <limits.h>
+#include <string.h>
+
+//#define DEBUG
+#include "debug.h"
+
+#define hashsize(n) ((cmph_uint32)1<<(n))
+#define hashmask(n) (hashsize(n)-1)
+
+
+
+//#define NM2 /* Define this if you do not want power of 2 table sizes*/
+
+
+/*
+   --------------------------------------------------------------------
+   mix -- mix 3 32-bit values reversibly.
+   For every delta with one or two bits set, and the deltas of all three
+   high bits or all three low bits, whether the original value of a,b,c
+   is almost all zero or is uniformly distributed,
+ * If mix() is run forward or backward, at least 32 bits in a,b,c
+ have at least 1/4 probability of changing.
+ * If mix() is run forward, every bit of c will change between 1/3 and
+ 2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
+ mix() was built out of 36 single-cycle latency instructions in a 
+ structure that could supported 2x parallelism, like so:
+ a -= b; 
+ a -= c; x = (c>>13);
+ b -= c; a ^= x;
+ b -= a; x = (a<<8);
+ c -= a; b ^= x;
+ c -= b; x = (b>>13);
+ ...
+ Unfortunately, superscalar Pentiums and Sparcs can't take advantage 
+ of that parallelism.  They've also turned some of those single-cycle
+ latency instructions into multi-cycle latency instructions.  Still,
+ this is the fastest good hash I could find.  There were about 2^^68
+ to choose from.  I only looked at a billion or so.
+ --------------------------------------------------------------------
+ */
+#define mix(a,b,c) \
+{ \
+	a -= b; a -= c; a ^= (c>>13); \
+	b -= c; b -= a; b ^= (a<<8); \
+	c -= a; c -= b; c ^= (b>>13); \
+	a -= b; a -= c; a ^= (c>>12);  \
+	b -= c; b -= a; b ^= (a<<16); \
+	c -= a; c -= b; c ^= (b>>5); \
+	a -= b; a -= c; a ^= (c>>3);  \
+	b -= c; b -= a; b ^= (a<<10); \
+	c -= a; c -= b; c ^= (b>>15); \
+}
+
+/*
+   --------------------------------------------------------------------
+   hash() -- hash a variable-length key into a 32-bit value
+k       : the key (the unaligned variable-length array of bytes)
+len     : the length of the key, counting by bytes
+initval : can be any 4-byte value
+Returns a 32-bit value.  Every bit of the key affects every bit of
+the return value.  Every 1-bit and 2-bit delta achieves avalanche.
+About 6*len+35 instructions.
+
+The best hash table sizes are powers of 2.  There is no need to do
+mod a prime (mod is sooo slow!).  If you need less than 32 bits,
+use a bitmask.  For example, if you need only 10 bits, do
+h = (h & hashmask(10));
+In which case, the hash table should have hashsize(10) elements.
+
+If you are hashing n strings (cmph_uint8 **)k, do it like this:
+for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
+
+By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.  You may use this
+code any way you wish, private, educational, or commercial.  It's free.
+
+See http://burtleburtle.net/bob/hash/evahash.html
+Use for hash table lookup, or anything where one collision in 2^^32 is
+acceptable.  Do NOT use for cryptographic purposes.
+--------------------------------------------------------------------
+ */
+jenkins_state_t *jenkins_state_new(cmph_uint32 size) //size of hash table
+{
+	jenkins_state_t *state = (jenkins_state_t *)malloc(sizeof(jenkins_state_t));
+	DEBUGP("Initializing jenkins hash\n");
+	state->seed = ((cmph_uint32)rand() % size);
+	return state;
+}
+void jenkins_state_destroy(jenkins_state_t *state)
+{
+	free(state);
+}
+
+
+inline void __jenkins_hash_vector(cmph_uint32 seed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
+{
+	register cmph_uint32 len, length;
+
+	/* Set up the internal state */
+	length = keylen;
+	len = length;
+	hashes[0] = hashes[1] = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
+	hashes[2] = seed;   /* the previous hash value - seed in our case */
+
+	/*---------------------------------------- handle most of the key */
+	while (len >= 12)
+	{
+		hashes[0] += ((cmph_uint32)k[0] +((cmph_uint32)k[1]<<8) +((cmph_uint32)k[2]<<16) +((cmph_uint32)k[3]<<24));
+		hashes[1] += ((cmph_uint32)k[4] +((cmph_uint32)k[5]<<8) +((cmph_uint32)k[6]<<16) +((cmph_uint32)k[7]<<24));
+		hashes[2] += ((cmph_uint32)k[8] +((cmph_uint32)k[9]<<8) +((cmph_uint32)k[10]<<16)+((cmph_uint32)k[11]<<24));
+		mix(hashes[0],hashes[1],hashes[2]);
+		k += 12; len -= 12;
+	}
+
+	/*------------------------------------- handle the last 11 bytes */
+	hashes[2]  += length;
+	switch(len)              /* all the case statements fall through */
+	{
+		case 11: 
+			hashes[2] +=((cmph_uint32)k[10]<<24);
+		case 10: 
+			hashes[2] +=((cmph_uint32)k[9]<<16);
+		case 9 : 
+			hashes[2] +=((cmph_uint32)k[8]<<8);
+			/* the first byte of hashes[2] is reserved for the length */
+		case 8 : 
+			hashes[1] +=((cmph_uint32)k[7]<<24);
+		case 7 : 
+			hashes[1] +=((cmph_uint32)k[6]<<16);
+		case 6 : 
+			hashes[1] +=((cmph_uint32)k[5]<<8);
+		case 5 :
+			hashes[1] +=(cmph_uint8) k[4];
+		case 4 : 
+			hashes[0] +=((cmph_uint32)k[3]<<24);
+		case 3 : 
+			hashes[0] +=((cmph_uint32)k[2]<<16);
+		case 2 : 
+			hashes[0] +=((cmph_uint32)k[1]<<8);
+		case 1 : 
+			hashes[0] +=(cmph_uint8)k[0];
+			/* case 0: nothing left to add */
+	}
+
+	mix(hashes[0],hashes[1],hashes[2]);
+}
+
+cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keylen)
+{
+	cmph_uint32 hashes[3];
+	__jenkins_hash_vector(state->seed, k, keylen, hashes);
+	return hashes[2];
+/*	cmph_uint32 a, b, c;
+	cmph_uint32 len, length;
+
+	// Set up the internal state 
+	length = keylen;
+	len = length;
+	a = b = 0x9e3779b9;  // the golden ratio; an arbitrary value 
+	c = state->seed;   // the previous hash value - seed in our case 
+
+	// handle most of the key 
+	while (len >= 12)
+	{
+		a += (k[0] +((cmph_uint32)k[1]<<8) +((cmph_uint32)k[2]<<16) +((cmph_uint32)k[3]<<24));
+		b += (k[4] +((cmph_uint32)k[5]<<8) +((cmph_uint32)k[6]<<16) +((cmph_uint32)k[7]<<24));
+		c += (k[8] +((cmph_uint32)k[9]<<8) +((cmph_uint32)k[10]<<16)+((cmph_uint32)k[11]<<24));
+		mix(a,b,c);
+		k += 12; len -= 12;
+	}
+
+	// handle the last 11 bytes
+	c  += length;
+	switch(len)              /// all the case statements fall through 
+	{
+		case 11: 
+			c +=((cmph_uint32)k[10]<<24);
+		case 10: 
+			c +=((cmph_uint32)k[9]<<16);
+		case 9 : 
+			c +=((cmph_uint32)k[8]<<8);
+			// the first byte of c is reserved for the length 
+		case 8 : 
+			b +=((cmph_uint32)k[7]<<24);
+		case 7 : 
+			b +=((cmph_uint32)k[6]<<16);
+		case 6 : 
+			b +=((cmph_uint32)k[5]<<8);
+		case 5 : 
+			b +=k[4];
+		case 4 : 
+			a +=((cmph_uint32)k[3]<<24);
+		case 3 : 
+			a +=((cmph_uint32)k[2]<<16);
+		case 2 : 
+			a +=((cmph_uint32)k[1]<<8);
+		case 1 : 
+			a +=k[0];
+		// case 0: nothing left to add 
+	}
+
+	mix(a,b,c);
+
+	/// report the result 
+
+	return c;
+	*/
+}
+
+void jenkins_hash_vector_(jenkins_state_t *state, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
+{
+	__jenkins_hash_vector(state->seed, k, keylen, hashes);
+}
+
+void jenkins_state_dump(jenkins_state_t *state, char **buf, cmph_uint32 *buflen)
+{
+	*buflen = sizeof(cmph_uint32);
+	*buf = (char *)malloc(sizeof(cmph_uint32));
+	if (!*buf) 
+	{
+		*buflen = UINT_MAX;
+		return;
+	}
+	memcpy(*buf, &(state->seed), sizeof(cmph_uint32));
+	DEBUGP("Dumped jenkins state with seed %u\n", state->seed);
+	return;
+}
+
+jenkins_state_t *jenkins_state_copy(jenkins_state_t *src_state)
+{
+	jenkins_state_t *dest_state = (jenkins_state_t *)malloc(sizeof(jenkins_state_t));
+	dest_state->hashfunc = src_state->hashfunc;
+	dest_state->seed = src_state->seed;
+	return dest_state;
+}
+
+jenkins_state_t *jenkins_state_load(const char *buf, cmph_uint32 buflen)
+{
+	jenkins_state_t *state = (jenkins_state_t *)malloc(sizeof(jenkins_state_t));
+	state->seed = *(cmph_uint32 *)buf;
+	state->hashfunc = CMPH_HASH_JENKINS;
+	DEBUGP("Loaded jenkins state with seed %u\n", state->seed);
+	return state;
+}
+
+
+/** \fn void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed);
+ *  \brief Support the ability to pack a jenkins function into a preallocated contiguous memory space pointed by jenkins_packed.
+ *  \param state points to the jenkins function
+ *  \param jenkins_packed pointer to the contiguous memory area used to store the jenkins function. The size of jenkins_packed must be at least jenkins_state_packed_size() 
+ */
+void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed)
+{
+	if (state && jenkins_packed)
+	{
+		memcpy(jenkins_packed, &(state->seed), sizeof(cmph_uint32));
+	}
+}
+
+/** \fn cmph_uint32 jenkins_state_packed_size(jenkins_state_t *state);
+ *  \brief Return the amount of space needed to pack a jenkins function.
+ *  \return the size of the packed function or zero for failures
+ */ 
+cmph_uint32 jenkins_state_packed_size()
+{
+	return sizeof(cmph_uint32);
+}
+
+
+/** \fn cmph_uint32 jenkins_hash_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen);
+ *  \param jenkins_packed is a pointer to a contiguous memory area
+ *  \param key is a pointer to a key
+ *  \param keylen is the key length
+ *  \return an integer that represents a hash value of 32 bits.
+ */
+cmph_uint32 jenkins_hash_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen)
+{
+	cmph_uint32 hashes[3];
+	__jenkins_hash_vector(*((cmph_uint32 *)jenkins_packed), k, keylen, hashes);
+	return hashes[2];
+}
+
+/** \fn jenkins_hash_vector_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
+ *  \param jenkins_packed is a pointer to a contiguous memory area
+ *  \param key is a pointer to a key
+ *  \param keylen is the key length
+ *  \param hashes is a pointer to a memory large enough to fit three 32-bit integers.
+ */
+void jenkins_hash_vector_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
+{
+	__jenkins_hash_vector(*((cmph_uint32 *)jenkins_packed), k, keylen, hashes);
+}