summaryrefslogtreecommitdiff
path: root/girepository/cmph
diff options
context:
space:
mode:
authorColin Walters <walters@verbum.org>2010-11-11 15:01:07 -0500
committerColin Walters <walters@verbum.org>2010-12-03 16:03:31 -0500
commit3a94a5e36287072486831eb68bfe43a0e1c8ea78 (patch)
tree59eb83ba02a9acfd7032c2199d6e523e89efe418 /girepository/cmph
parentf6e5defff5a8d8ba83e0b414496d28b5e2361f39 (diff)
downloadgobject-introspection-3a94a5e36287072486831eb68bfe43a0e1c8ea78.tar.gz
Import CMPH 1.0
This will be used for typelib indexing. See README-CMPH-IMPORT.txt for more information.
Diffstat (limited to 'girepository/cmph')
-rw-r--r--girepository/cmph/README-CMPH-IMPORT.txt5
-rwxr-xr-xgirepository/cmph/bdz.c703
-rwxr-xr-xgirepository/cmph/bdz.h43
-rwxr-xr-xgirepository/cmph/bdz_gen_lookup_table.c33
-rwxr-xr-xgirepository/cmph/bdz_ph.c621
-rwxr-xr-xgirepository/cmph/bdz_ph.h42
-rwxr-xr-xgirepository/cmph/bdz_structs.h36
-rwxr-xr-xgirepository/cmph/bdz_structs_ph.h26
-rw-r--r--girepository/cmph/bitbool.h179
-rw-r--r--girepository/cmph/bmz.c620
-rw-r--r--girepository/cmph/bmz.h42
-rw-r--r--girepository/cmph/bmz8.c632
-rw-r--r--girepository/cmph/bmz8.h42
-rw-r--r--girepository/cmph/bmz8_structs.h25
-rw-r--r--girepository/cmph/bmz_structs.h25
-rwxr-xr-xgirepository/cmph/brz.c985
-rw-r--r--girepository/cmph/brz.h47
-rwxr-xr-xgirepository/cmph/brz_structs.h39
-rw-r--r--girepository/cmph/buffer_entry.c103
-rw-r--r--girepository/cmph/buffer_entry.h14
-rw-r--r--girepository/cmph/buffer_manage.c66
-rw-r--r--girepository/cmph/buffer_manage.h12
-rw-r--r--girepository/cmph/buffer_manager.c64
-rw-r--r--girepository/cmph/buffer_manager.h12
-rw-r--r--girepository/cmph/chd.c271
-rw-r--r--girepository/cmph/chd.h59
-rw-r--r--girepository/cmph/chd_ph.c988
-rw-r--r--girepository/cmph/chd_ph.h59
-rw-r--r--girepository/cmph/chd_structs.h21
-rw-r--r--girepository/cmph/chd_structs_ph.h29
-rw-r--r--girepository/cmph/chm.c381
-rw-r--r--girepository/cmph/chm.h42
-rw-r--r--girepository/cmph/chm_structs.h24
-rw-r--r--girepository/cmph/cmph.c845
-rw-r--r--girepository/cmph/cmph.h112
-rw-r--r--girepository/cmph/cmph_structs.c69
-rw-r--r--girepository/cmph/cmph_structs.h33
-rw-r--r--girepository/cmph/cmph_time.h62
-rw-r--r--girepository/cmph/cmph_types.h42
-rw-r--r--girepository/cmph/compressed_rank.c321
-rw-r--r--girepository/cmph/compressed_rank.h55
-rw-r--r--girepository/cmph/compressed_seq.c378
-rw-r--r--girepository/cmph/compressed_seq.h84
-rw-r--r--girepository/cmph/debug.h53
-rw-r--r--girepository/cmph/djb2_hash.c49
-rw-r--r--girepository/cmph/djb2_hash.h18
-rw-r--r--girepository/cmph/fch.c517
-rw-r--r--girepository/cmph/fch.h48
-rw-r--r--girepository/cmph/fch_buckets.c214
-rw-r--r--girepository/cmph/fch_buckets.h30
-rwxr-xr-xgirepository/cmph/fch_structs.h30
-rw-r--r--girepository/cmph/fnv_hash.c53
-rw-r--r--girepository/cmph/fnv_hash.h18
-rw-r--r--girepository/cmph/graph.c338
-rw-r--r--girepository/cmph/graph.h40
-rw-r--r--girepository/cmph/hash.c216
-rw-r--r--girepository/cmph/hash.h76
-rw-r--r--girepository/cmph/hash_state.h12
-rw-r--r--girepository/cmph/hashtree.c289
-rw-r--r--girepository/cmph/hashtree.h19
-rw-r--r--girepository/cmph/hashtree_structs.h32
-rw-r--r--girepository/cmph/jenkins_hash.c297
-rw-r--r--girepository/cmph/jenkins_hash.h65
-rw-r--r--girepository/cmph/main.c342
-rw-r--r--girepository/cmph/miller_rabin.c67
-rw-r--r--girepository/cmph/miller_rabin.h5
-rw-r--r--girepository/cmph/sdbm_hash.c49
-rw-r--r--girepository/cmph/sdbm_hash.h18
-rw-r--r--girepository/cmph/select.c337
-rw-r--r--girepository/cmph/select.h61
-rw-r--r--girepository/cmph/select_lookup_tables.h170
-rw-r--r--girepository/cmph/vqueue.c51
-rw-r--r--girepository/cmph/vqueue.h18
-rw-r--r--girepository/cmph/vstack.c79
-rw-r--r--girepository/cmph/vstack.h18
-rw-r--r--girepository/cmph/wingetopt.c179
-rw-r--r--girepository/cmph/wingetopt.h25
77 files changed, 12124 insertions, 0 deletions
diff --git a/girepository/cmph/README-CMPH-IMPORT.txt b/girepository/cmph/README-CMPH-IMPORT.txt
new file mode 100644
index 00000000..a1c23c24
--- /dev/null
+++ b/girepository/cmph/README-CMPH-IMPORT.txt
@@ -0,0 +1,5 @@
+This import of CMPH was made from revision bfdcc3a3a18dfb9 of
+git://cmph.git.sourceforge.net/gitroot/cmph/cmph
+
+Only the following files were taken, and everything else deleted:
+COPYING src/*.[ch]
diff --git a/girepository/cmph/bdz.c b/girepository/cmph/bdz.c
new file mode 100755
index 00000000..f422c8f9
--- /dev/null
+++ b/girepository/cmph/bdz.c
@@ -0,0 +1,703 @@
+#include "bdz.h"
+#include "cmph_structs.h"
+#include "bdz_structs.h"
+#include "hash.h"
+#include "bitbool.h"
+
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+//#define DEBUG
+#include "debug.h"
+#define UNASSIGNED 3U
+#define NULL_EDGE 0xffffffff
+
+//cmph_uint32 ngrafos = 0;
+//cmph_uint32 ngrafos_aciclicos = 0;
+// table used for looking up the number of assigned vertices a 8-bit integer
+const cmph_uint8 bdz_lookup_table[] =
+{
+4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
+4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
+4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
+3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
+4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
+4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
+4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
+3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
+4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
+4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
+4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
+3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
+3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
+3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
+3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
+2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1, 0
+};
+
+typedef struct
+{
+ cmph_uint32 vertices[3];
+ cmph_uint32 next_edges[3];
+}bdz_edge_t;
+
+typedef cmph_uint32 * bdz_queue_t;
+
+static void bdz_alloc_queue(bdz_queue_t * queuep, cmph_uint32 nedges)
+{
+ (*queuep)=malloc(nedges*sizeof(cmph_uint32));
+};
+static void bdz_free_queue(bdz_queue_t * queue)
+{
+ free(*queue);
+};
+
+typedef struct
+{
+ cmph_uint32 nedges;
+ bdz_edge_t * edges;
+ cmph_uint32 * first_edge;
+ cmph_uint8 * vert_degree;
+}bdz_graph3_t;
+
+
+static void bdz_alloc_graph3(bdz_graph3_t * graph3, cmph_uint32 nedges, cmph_uint32 nvertices)
+{
+ graph3->edges=malloc(nedges*sizeof(bdz_edge_t));
+ graph3->first_edge=malloc(nvertices*sizeof(cmph_uint32));
+ graph3->vert_degree=malloc((size_t)nvertices);
+};
+static void bdz_init_graph3(bdz_graph3_t * graph3, cmph_uint32 nedges, cmph_uint32 nvertices)
+{
+ memset(graph3->first_edge,0xff,nvertices*sizeof(cmph_uint32));
+ memset(graph3->vert_degree,0,(size_t)nvertices);
+ graph3->nedges=0;
+};
+static void bdz_free_graph3(bdz_graph3_t *graph3)
+{
+ free(graph3->edges);
+ free(graph3->first_edge);
+ free(graph3->vert_degree);
+};
+
+static void bdz_partial_free_graph3(bdz_graph3_t *graph3)
+{
+ free(graph3->first_edge);
+ free(graph3->vert_degree);
+ graph3->first_edge = NULL;
+ graph3->vert_degree = NULL;
+};
+
+static void bdz_add_edge(bdz_graph3_t * graph3, cmph_uint32 v0, cmph_uint32 v1, cmph_uint32 v2)
+{
+ graph3->edges[graph3->nedges].vertices[0]=v0;
+ graph3->edges[graph3->nedges].vertices[1]=v1;
+ graph3->edges[graph3->nedges].vertices[2]=v2;
+ graph3->edges[graph3->nedges].next_edges[0]=graph3->first_edge[v0];
+ graph3->edges[graph3->nedges].next_edges[1]=graph3->first_edge[v1];
+ graph3->edges[graph3->nedges].next_edges[2]=graph3->first_edge[v2];
+ graph3->first_edge[v0]=graph3->first_edge[v1]=graph3->first_edge[v2]=graph3->nedges;
+ graph3->vert_degree[v0]++;
+ graph3->vert_degree[v1]++;
+ graph3->vert_degree[v2]++;
+ graph3->nedges++;
+};
+
+static void bdz_dump_graph(bdz_graph3_t* graph3, cmph_uint32 nedges, cmph_uint32 nvertices)
+{
+ int i;
+ for(i=0;i<nedges;i++){
+ printf("\nedge %d %d %d %d ",i,graph3->edges[i].vertices[0],
+ graph3->edges[i].vertices[1],graph3->edges[i].vertices[2]);
+ printf(" nexts %d %d %d",graph3->edges[i].next_edges[0],
+ graph3->edges[i].next_edges[1],graph3->edges[i].next_edges[2]);
+ };
+
+ for(i=0;i<nvertices;i++){
+ printf("\nfirst for vertice %d %d ",i,graph3->first_edge[i]);
+
+ };
+};
+
+static void bdz_remove_edge(bdz_graph3_t * graph3, cmph_uint32 curr_edge)
+{
+ cmph_uint32 i,j=0,vert,edge1,edge2;
+ for(i=0;i<3;i++){
+ vert=graph3->edges[curr_edge].vertices[i];
+ edge1=graph3->first_edge[vert];
+ edge2=NULL_EDGE;
+ while(edge1!=curr_edge&&edge1!=NULL_EDGE){
+ edge2=edge1;
+ if(graph3->edges[edge1].vertices[0]==vert){
+ j=0;
+ } else if(graph3->edges[edge1].vertices[1]==vert){
+ j=1;
+ } else
+ j=2;
+ edge1=graph3->edges[edge1].next_edges[j];
+ };
+ if(edge1==NULL_EDGE){
+ printf("\nerror remove edge %d dump graph",curr_edge);
+ bdz_dump_graph(graph3,graph3->nedges,graph3->nedges+graph3->nedges/4);
+ exit(-1);
+ };
+
+ if(edge2!=NULL_EDGE){
+ graph3->edges[edge2].next_edges[j] =
+ graph3->edges[edge1].next_edges[i];
+ } else
+ graph3->first_edge[vert]=
+ graph3->edges[edge1].next_edges[i];
+ graph3->vert_degree[vert]--;
+ };
+
+};
+
+static int bdz_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_queue_t queue, bdz_graph3_t* graph3)
+{
+ cmph_uint32 i,v0,v1,v2;
+ cmph_uint32 queue_head=0,queue_tail=0;
+ cmph_uint32 curr_edge;
+ cmph_uint32 tmp_edge;
+ cmph_uint8 * marked_edge =malloc((size_t)(nedges >> 3) + 1);
+ memset(marked_edge, 0, (size_t)(nedges >> 3) + 1);
+
+ for(i=0;i<nedges;i++){
+ v0=graph3->edges[i].vertices[0];
+ v1=graph3->edges[i].vertices[1];
+ v2=graph3->edges[i].vertices[2];
+ if(graph3->vert_degree[v0]==1 ||
+ graph3->vert_degree[v1]==1 ||
+ graph3->vert_degree[v2]==1){
+ if(!GETBIT(marked_edge,i)) {
+ queue[queue_head++]=i;
+ SETBIT(marked_edge,i);
+ }
+ };
+ };
+ while(queue_tail!=queue_head){
+ curr_edge=queue[queue_tail++];
+ bdz_remove_edge(graph3,curr_edge);
+ v0=graph3->edges[curr_edge].vertices[0];
+ v1=graph3->edges[curr_edge].vertices[1];
+ v2=graph3->edges[curr_edge].vertices[2];
+ if(graph3->vert_degree[v0]==1 ) {
+ tmp_edge=graph3->first_edge[v0];
+ if(!GETBIT(marked_edge,tmp_edge)) {
+ queue[queue_head++]=tmp_edge;
+ SETBIT(marked_edge,tmp_edge);
+ };
+
+ };
+ if(graph3->vert_degree[v1]==1) {
+ tmp_edge=graph3->first_edge[v1];
+ if(!GETBIT(marked_edge,tmp_edge)){
+ queue[queue_head++]=tmp_edge;
+ SETBIT(marked_edge,tmp_edge);
+ };
+
+ };
+ if(graph3->vert_degree[v2]==1){
+ tmp_edge=graph3->first_edge[v2];
+ if(!GETBIT(marked_edge,tmp_edge)){
+ queue[queue_head++]=tmp_edge;
+ SETBIT(marked_edge,tmp_edge);
+ };
+ };
+ };
+ free(marked_edge);
+ return (int)(queue_head-nedges);/* returns 0 if successful otherwies return negative number*/
+};
+
+static int bdz_mapping(cmph_config_t *mph, bdz_graph3_t* graph3, bdz_queue_t queue);
+static void assigning(bdz_config_data_t *bdz, bdz_graph3_t* graph3, bdz_queue_t queue);
+static void ranking(bdz_config_data_t *bdz);
+static cmph_uint32 rank(cmph_uint32 b, cmph_uint32 * ranktable, cmph_uint8 * g, cmph_uint32 vertex);
+
+bdz_config_data_t *bdz_config_new()
+{
+ bdz_config_data_t *bdz;
+ bdz = (bdz_config_data_t *)malloc(sizeof(bdz_config_data_t));
+ assert(bdz);
+ memset(bdz, 0, sizeof(bdz_config_data_t));
+ bdz->hashfunc = CMPH_HASH_JENKINS;
+ bdz->g = NULL;
+ bdz->hl = NULL;
+ bdz->k = 0; //kth index in ranktable, $k = log_2(n=3r)/\varepsilon$
+ bdz->b = 7; // number of bits of k
+ bdz->ranktablesize = 0; //number of entries in ranktable, $n/k +1$
+ bdz->ranktable = NULL; // rank table
+ return bdz;
+}
+
+void bdz_config_destroy(cmph_config_t *mph)
+{
+ bdz_config_data_t *data = (bdz_config_data_t *)mph->data;
+ DEBUGP("Destroying algorithm dependent data\n");
+ free(data);
+}
+
+void bdz_config_set_b(cmph_config_t *mph, cmph_uint32 b)
+{
+ bdz_config_data_t *bdz = (bdz_config_data_t *)mph->data;
+ if (b <= 2 || b > 10) b = 7; // validating restrictions over parameter b.
+ bdz->b = (cmph_uint8)b;
+ DEBUGP("b: %u\n", b);
+
+}
+
+void bdz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
+{
+ bdz_config_data_t *bdz = (bdz_config_data_t *)mph->data;
+ CMPH_HASH *hashptr = hashfuncs;
+ cmph_uint32 i = 0;
+ while(*hashptr != CMPH_HASH_COUNT)
+ {
+ if (i >= 1) break; //bdz only uses one linear hash function
+ bdz->hashfunc = *hashptr;
+ ++i, ++hashptr;
+ }
+}
+
+cmph_t *bdz_new(cmph_config_t *mph, double c)
+{
+ cmph_t *mphf = NULL;
+ bdz_data_t *bdzf = NULL;
+ cmph_uint32 iterations;
+ bdz_queue_t edges;
+ bdz_graph3_t graph3;
+ bdz_config_data_t *bdz = (bdz_config_data_t *)mph->data;
+ #ifdef CMPH_TIMING
+ double construction_time_begin = 0.0;
+ double construction_time = 0.0;
+ ELAPSED_TIME_IN_SECONDS(&construction_time_begin);
+ #endif
+
+
+ if (c == 0) c = 1.23; // validating restrictions over parameter c.
+ DEBUGP("c: %f\n", c);
+ bdz->m = mph->key_source->nkeys;
+ bdz->r = (cmph_uint32)ceil((c * mph->key_source->nkeys)/3);
+ if ((bdz->r % 2) == 0) bdz->r+=1;
+ bdz->n = 3*bdz->r;
+
+ bdz->k = (1U << bdz->b);
+ DEBUGP("b: %u -- k: %u\n", bdz->b, bdz->k);
+
+ bdz->ranktablesize = (cmph_uint32)ceil(bdz->n/(double)bdz->k);
+ DEBUGP("ranktablesize: %u\n", bdz->ranktablesize);
+
+
+ bdz_alloc_graph3(&graph3, bdz->m, bdz->n);
+ bdz_alloc_queue(&edges,bdz->m);
+ DEBUGP("Created hypergraph\n");
+
+ DEBUGP("m (edges): %u n (vertices): %u r: %u c: %f \n", bdz->m, bdz->n, bdz->r, c);
+
+ // Mapping step
+ iterations = 1000;
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Entering mapping step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n);
+ }
+ while(1)
+ {
+ int ok;
+ DEBUGP("linear hash function \n");
+ bdz->hl = hash_state_new(bdz->hashfunc, 15);
+
+ ok = bdz_mapping(mph, &graph3, edges);
+ //ok = 0;
+ if (!ok)
+ {
+ --iterations;
+ hash_state_destroy(bdz->hl);
+ bdz->hl = NULL;
+ DEBUGP("%u iterations remaining\n", iterations);
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "acyclic graph creation failure - %u iterations remaining\n", iterations);
+ }
+ if (iterations == 0) break;
+ }
+ else break;
+ }
+
+ if (iterations == 0)
+ {
+ bdz_free_queue(&edges);
+ bdz_free_graph3(&graph3);
+ return NULL;
+ }
+ bdz_partial_free_graph3(&graph3);
+ // Assigning step
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Entering assigning step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n);
+ }
+ assigning(bdz, &graph3, edges);
+
+ bdz_free_queue(&edges);
+ bdz_free_graph3(&graph3);
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Entering ranking step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n);
+ }
+ ranking(bdz);
+ #ifdef CMPH_TIMING
+ ELAPSED_TIME_IN_SECONDS(&construction_time);
+ #endif
+ mphf = (cmph_t *)malloc(sizeof(cmph_t));
+ mphf->algo = mph->algo;
+ bdzf = (bdz_data_t *)malloc(sizeof(bdz_data_t));
+ bdzf->g = bdz->g;
+ bdz->g = NULL; //transfer memory ownership
+ bdzf->hl = bdz->hl;
+ bdz->hl = NULL; //transfer memory ownership
+ bdzf->ranktable = bdz->ranktable;
+ bdz->ranktable = NULL; //transfer memory ownership
+ bdzf->ranktablesize = bdz->ranktablesize;
+ bdzf->k = bdz->k;
+ bdzf->b = bdz->b;
+ bdzf->n = bdz->n;
+ bdzf->m = bdz->m;
+ bdzf->r = bdz->r;
+ mphf->data = bdzf;
+ mphf->size = bdz->m;
+
+ DEBUGP("Successfully generated minimal perfect hash\n");
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Successfully generated minimal perfect hash function\n");
+ }
+
+
+ #ifdef CMPH_TIMING
+ register cmph_uint32 space_usage = bdz_packed_size(mphf)*8;
+ register cmph_uint32 keys_per_bucket = 1;
+ construction_time = construction_time - construction_time_begin;
+ fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", bdz->m, bdz->m/(double)bdz->n, keys_per_bucket, construction_time, space_usage/(double)bdz->m);
+ #endif
+
+ return mphf;
+}
+
+
+static int bdz_mapping(cmph_config_t *mph, bdz_graph3_t* graph3, bdz_queue_t queue)
+{
+ cmph_uint32 e;
+ int cycles = 0;
+ cmph_uint32 hl[3];
+ bdz_config_data_t *bdz = (bdz_config_data_t *)mph->data;
+ bdz_init_graph3(graph3, bdz->m, bdz->n);
+ mph->key_source->rewind(mph->key_source->data);
+ for (e = 0; e < mph->key_source->nkeys; ++e)
+ {
+ cmph_uint32 h0, h1, h2;
+ cmph_uint32 keylen;
+ char *key = NULL;
+ mph->key_source->read(mph->key_source->data, &key, &keylen);
+ hash_vector(bdz->hl, key, keylen,hl);
+ h0 = hl[0] % bdz->r;
+ h1 = hl[1] % bdz->r + bdz->r;
+ h2 = hl[2] % bdz->r + (bdz->r << 1);
+ mph->key_source->dispose(mph->key_source->data, key, keylen);
+ bdz_add_edge(graph3,h0,h1,h2);
+ }
+ cycles = bdz_generate_queue(bdz->m, bdz->n, queue, graph3);
+ return (cycles == 0);
+}
+
+static void assigning(bdz_config_data_t *bdz, bdz_graph3_t* graph3, bdz_queue_t queue)
+{
+ cmph_uint32 i;
+ cmph_uint32 nedges=graph3->nedges;
+ cmph_uint32 curr_edge;
+ cmph_uint32 v0,v1,v2;
+ cmph_uint8 * marked_vertices =malloc((size_t)(bdz->n >> 3) + 1);
+ cmph_uint32 sizeg = (cmph_uint32)ceil(bdz->n/4.0);
+ bdz->g = (cmph_uint8 *)calloc((size_t)(sizeg), sizeof(cmph_uint8));
+ memset(marked_vertices, 0, (size_t)(bdz->n >> 3) + 1);
+ memset(bdz->g, 0xff, (size_t)(sizeg));
+
+ for(i=nedges-1;i+1>=1;i--){
+ curr_edge=queue[i];
+ v0=graph3->edges[curr_edge].vertices[0];
+ v1=graph3->edges[curr_edge].vertices[1];
+ v2=graph3->edges[curr_edge].vertices[2];
+ DEBUGP("B:%u %u %u -- %u %u %u\n", v0, v1, v2, GETVALUE(bdz->g, v0), GETVALUE(bdz->g, v1), GETVALUE(bdz->g, v2));
+ if(!GETBIT(marked_vertices, v0)){
+ if(!GETBIT(marked_vertices,v1))
+ {
+ SETVALUE1(bdz->g, v1, UNASSIGNED);
+ SETBIT(marked_vertices, v1);
+ }
+ if(!GETBIT(marked_vertices,v2))
+ {
+ SETVALUE1(bdz->g, v2, UNASSIGNED);
+ SETBIT(marked_vertices, v2);
+ }
+ SETVALUE1(bdz->g, v0, (6-(GETVALUE(bdz->g, v1) + GETVALUE(bdz->g,v2)))%3);
+ SETBIT(marked_vertices, v0);
+ } else if(!GETBIT(marked_vertices, v1)) {
+ if(!GETBIT(marked_vertices, v2))
+ {
+ SETVALUE1(bdz->g, v2, UNASSIGNED);
+ SETBIT(marked_vertices, v2);
+ }
+ SETVALUE1(bdz->g, v1, (7-(GETVALUE(bdz->g, v0)+GETVALUE(bdz->g, v2)))%3);
+ SETBIT(marked_vertices, v1);
+ }else {
+ SETVALUE1(bdz->g, v2, (8-(GETVALUE(bdz->g,v0)+GETVALUE(bdz->g, v1)))%3);
+ SETBIT(marked_vertices, v2);
+ }
+ DEBUGP("A:%u %u %u -- %u %u %u\n", v0, v1, v2, GETVALUE(bdz->g, v0), GETVALUE(bdz->g, v1), GETVALUE(bdz->g, v2));
+ };
+ free(marked_vertices);
+}
+
+
+static void ranking(bdz_config_data_t *bdz)
+{
+ cmph_uint32 i, j, offset = 0U, count = 0U, size = (bdz->k >> 2U), nbytes_total = (cmph_uint32)ceil(bdz->n/4.0), nbytes;
+ bdz->ranktable = (cmph_uint32 *)calloc((size_t)bdz->ranktablesize, sizeof(cmph_uint32));
+ // ranktable computation
+ bdz->ranktable[0] = 0;
+ i = 1;
+ while(1)
+ {
+ if(i == bdz->ranktablesize) break;
+ nbytes = size < nbytes_total? size : nbytes_total;
+ for(j = 0; j < nbytes; j++)
+ {
+ count += bdz_lookup_table[*(bdz->g + offset + j)];
+ }
+ bdz->ranktable[i] = count;
+ offset += nbytes;
+ nbytes_total -= size;
+ i++;
+ }
+}
+
+
+int bdz_dump(cmph_t *mphf, FILE *fd)
+{
+ char *buf = NULL;
+ cmph_uint32 buflen;
+ register size_t nbytes;
+ bdz_data_t *data = (bdz_data_t *)mphf->data;
+ __cmph_dump(mphf, fd);
+
+ hash_state_dump(data->hl, &buf, &buflen);
+ DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
+ nbytes = fwrite(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
+ nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd);
+ free(buf);
+
+ nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd);
+ nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
+ nbytes = fwrite(&(data->r), sizeof(cmph_uint32), (size_t)1, fd);
+
+ cmph_uint32 sizeg = (cmph_uint32)ceil(data->n/4.0);
+ nbytes = fwrite(data->g, sizeof(cmph_uint8)*sizeg, (size_t)1, fd);
+
+ nbytes = fwrite(&(data->k), sizeof(cmph_uint32), (size_t)1, fd);
+ nbytes = fwrite(&(data->b), sizeof(cmph_uint8), (size_t)1, fd);
+ nbytes = fwrite(&(data->ranktablesize), sizeof(cmph_uint32), (size_t)1, fd);
+
+ nbytes = fwrite(data->ranktable, sizeof(cmph_uint32)*(data->ranktablesize), (size_t)1, fd);
+ #ifdef DEBUG
+ cmph_uint32 i;
+ fprintf(stderr, "G: ");
+ for (i = 0; i < data->n; ++i) fprintf(stderr, "%u ", GETVALUE(data->g, i));
+ fprintf(stderr, "\n");
+ #endif
+ return 1;
+}
+
+void bdz_load(FILE *f, cmph_t *mphf)
+{
+ char *buf = NULL;
+ cmph_uint32 buflen, sizeg;
+ register size_t nbytes;
+ bdz_data_t *bdz = (bdz_data_t *)malloc(sizeof(bdz_data_t));
+
+ DEBUGP("Loading bdz mphf\n");
+ mphf->data = bdz;
+
+ nbytes = fread(&buflen, sizeof(cmph_uint32), (size_t)1, f);
+ DEBUGP("Hash state has %u bytes\n", buflen);
+ buf = (char *)malloc((size_t)buflen);
+ nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
+ bdz->hl = hash_state_load(buf, buflen);
+ free(buf);
+
+
+ DEBUGP("Reading m and n\n");
+ nbytes = fread(&(bdz->n), sizeof(cmph_uint32), (size_t)1, f);
+ nbytes = fread(&(bdz->m), sizeof(cmph_uint32), (size_t)1, f);
+ nbytes = fread(&(bdz->r), sizeof(cmph_uint32), (size_t)1, f);
+ sizeg = (cmph_uint32)ceil(bdz->n/4.0);
+ bdz->g = (cmph_uint8 *)calloc((size_t)(sizeg), sizeof(cmph_uint8));
+ nbytes = fread(bdz->g, sizeg*sizeof(cmph_uint8), (size_t)1, f);
+
+ nbytes = fread(&(bdz->k), sizeof(cmph_uint32), (size_t)1, f);
+ nbytes = fread(&(bdz->b), sizeof(cmph_uint8), (size_t)1, f);
+ nbytes = fread(&(bdz->ranktablesize), sizeof(cmph_uint32), (size_t)1, f);
+
+ bdz->ranktable = (cmph_uint32 *)calloc((size_t)bdz->ranktablesize, sizeof(cmph_uint32));
+ nbytes = fread(bdz->ranktable, sizeof(cmph_uint32)*(bdz->ranktablesize), (size_t)1, f);
+
+ #ifdef DEBUG
+ cmph_uint32 i = 0;
+ fprintf(stderr, "G: ");
+ for (i = 0; i < bdz->n; ++i) fprintf(stderr, "%u ", GETVALUE(bdz->g,i));
+ fprintf(stderr, "\n");
+ #endif
+ return;
+}
+
+
+cmph_uint32 bdz_search_ph(cmph_t *mphf, const char *key, cmph_uint32 keylen)
+{
+ bdz_data_t *bdz = mphf->data;
+ cmph_uint32 hl[3];
+ hash_vector(bdz->hl, key, keylen, hl);
+ cmph_uint32 vertex;
+ hl[0] = hl[0] % bdz->r;
+ hl[1] = hl[1] % bdz->r + bdz->r;
+ hl[2] = hl[2] % bdz->r + (bdz->r << 1);
+ vertex = hl[(GETVALUE(bdz->g, hl[0]) + GETVALUE(bdz->g, hl[1]) + GETVALUE(bdz->g, hl[2])) % 3];
+ return vertex;
+}
+
+static inline cmph_uint32 rank(cmph_uint32 b, cmph_uint32 * ranktable, cmph_uint8 * g, cmph_uint32 vertex)
+{
+ register cmph_uint32 index = vertex >> b;
+ register cmph_uint32 base_rank = ranktable[index];
+ register cmph_uint32 beg_idx_v = index << b;
+ register cmph_uint32 beg_idx_b = beg_idx_v >> 2;
+ register cmph_uint32 end_idx_b = vertex >> 2;
+ while(beg_idx_b < end_idx_b)
+ {
+ base_rank += bdz_lookup_table[*(g + beg_idx_b++)];
+
+ }
+ beg_idx_v = beg_idx_b << 2;
+ while(beg_idx_v < vertex)
+ {
+ if(GETVALUE(g, beg_idx_v) != UNASSIGNED) base_rank++;
+ beg_idx_v++;
+ }
+
+ return base_rank;
+}
+
+cmph_uint32 bdz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
+{
+ register cmph_uint32 vertex;
+ register bdz_data_t *bdz = mphf->data;
+ cmph_uint32 hl[3];
+ hash_vector(bdz->hl, key, keylen, hl);
+ hl[0] = hl[0] % bdz->r;
+ hl[1] = hl[1] % bdz->r + bdz->r;
+ hl[2] = hl[2] % bdz->r + (bdz->r << 1);
+ vertex = hl[(GETVALUE(bdz->g, hl[0]) + GETVALUE(bdz->g, hl[1]) + GETVALUE(bdz->g, hl[2])) % 3];
+ return rank(bdz->b, bdz->ranktable, bdz->g, vertex);
+}
+
+
+void bdz_destroy(cmph_t *mphf)
+{
+ bdz_data_t *data = (bdz_data_t *)mphf->data;
+ free(data->g);
+ hash_state_destroy(data->hl);
+ free(data->ranktable);
+ free(data);
+ free(mphf);
+}
+
+/** \fn void bdz_pack(cmph_t *mphf, void *packed_mphf);
+ * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
+ * \param mphf pointer to the resulting mphf
+ * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
+ */
+void bdz_pack(cmph_t *mphf, void *packed_mphf)
+{
+ bdz_data_t *data = (bdz_data_t *)mphf->data;
+ cmph_uint8 * ptr = packed_mphf;
+
+ // packing hl type
+ CMPH_HASH hl_type = hash_get_type(data->hl);
+ *((cmph_uint32 *) ptr) = hl_type;
+ ptr += sizeof(cmph_uint32);
+
+ // packing hl
+ hash_state_pack(data->hl, ptr);
+ ptr += hash_state_packed_size(hl_type);
+
+ // packing r
+ *((cmph_uint32 *) ptr) = data->r;
+ ptr += sizeof(data->r);
+
+ // packing ranktablesize
+ *((cmph_uint32 *) ptr) = data->ranktablesize;
+ ptr += sizeof(data->ranktablesize);
+
+ // packing ranktable
+ memcpy(ptr, data->ranktable, sizeof(cmph_uint32)*(data->ranktablesize));
+ ptr += sizeof(cmph_uint32)*(data->ranktablesize);
+
+ // packing b
+ *ptr++ = data->b;
+
+ // packing g
+ cmph_uint32 sizeg = (cmph_uint32)ceil(data->n/4.0);
+ memcpy(ptr, data->g, sizeof(cmph_uint8)*sizeg);
+}
+
+/** \fn cmph_uint32 bdz_packed_size(cmph_t *mphf);
+ * \brief Return the amount of space needed to pack mphf.
+ * \param mphf pointer to a mphf
+ * \return the size of the packed function or zero for failures
+ */
+cmph_uint32 bdz_packed_size(cmph_t *mphf)
+{
+ bdz_data_t *data = (bdz_data_t *)mphf->data;
+
+ CMPH_HASH hl_type = hash_get_type(data->hl);
+
+ return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(hl_type) + 3*sizeof(cmph_uint32) + sizeof(cmph_uint32)*(data->ranktablesize) + sizeof(cmph_uint8) + sizeof(cmph_uint8)* (cmph_uint32)(ceil(data->n/4.0)));
+}
+
+/** cmph_uint32 bdz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
+ * \brief Use the packed mphf to do a search.
+ * \param packed_mphf pointer to the packed mphf
+ * \param key key to be hashed
+ * \param keylen key legth in bytes
+ * \return The mphf value
+ */
+cmph_uint32 bdz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
+{
+
+ register cmph_uint32 vertex;
+ register CMPH_HASH hl_type = *(cmph_uint32 *)packed_mphf;
+ register cmph_uint8 *hl_ptr = (cmph_uint8 *)(packed_mphf) + 4;
+
+ register cmph_uint32 *ranktable = (cmph_uint32*)(hl_ptr + hash_state_packed_size(hl_type));
+
+ register cmph_uint32 r = *ranktable++;
+ register cmph_uint32 ranktablesize = *ranktable++;
+ register cmph_uint8 * g = (cmph_uint8 *)(ranktable + ranktablesize);
+ register cmph_uint8 b = *g++;
+
+ cmph_uint32 hl[3];
+ hash_vector_packed(hl_ptr, hl_type, key, keylen, hl);
+ hl[0] = hl[0] % r;
+ hl[1] = hl[1] % r + r;
+ hl[2] = hl[2] % r + (r << 1);
+ vertex = hl[(GETVALUE(g, hl[0]) + GETVALUE(g, hl[1]) + GETVALUE(g, hl[2])) % 3];
+ return rank(b, ranktable, g, vertex);
+}
diff --git a/girepository/cmph/bdz.h b/girepository/cmph/bdz.h
new file mode 100755
index 00000000..f2b7b89c
--- /dev/null
+++ b/girepository/cmph/bdz.h
@@ -0,0 +1,43 @@
+#ifndef __CMPH_BDZ_H__
+#define __CMPH_BDZ_H__
+
+#include "cmph.h"
+
+typedef struct __bdz_data_t bdz_data_t;
+typedef struct __bdz_config_data_t bdz_config_data_t;
+
+bdz_config_data_t *bdz_config_new();
+void bdz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
+void bdz_config_destroy(cmph_config_t *mph);
+void bdz_config_set_b(cmph_config_t *mph, cmph_uint32 b);
+cmph_t *bdz_new(cmph_config_t *mph, double c);
+
+void bdz_load(FILE *f, cmph_t *mphf);
+int bdz_dump(cmph_t *mphf, FILE *f);
+void bdz_destroy(cmph_t *mphf);
+cmph_uint32 bdz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
+
+/** \fn void bdz_pack(cmph_t *mphf, void *packed_mphf);
+ * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
+ * \param mphf pointer to the resulting mphf
+ * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
+ */
+void bdz_pack(cmph_t *mphf, void *packed_mphf);
+
+/** \fn cmph_uint32 bdz_packed_size(cmph_t *mphf);
+ * \brief Return the amount of space needed to pack mphf.
+ * \param mphf pointer to a mphf
+ * \return the size of the packed function or zero for failures
+ */
+cmph_uint32 bdz_packed_size(cmph_t *mphf);
+
+/** cmph_uint32 bdz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
+ * \brief Use the packed mphf to do a search.
+ * \param packed_mphf pointer to the packed mphf
+ * \param key key to be hashed
+ * \param keylen key legth in bytes
+ * \return The mphf value
+ */
+cmph_uint32 bdz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
+
+#endif
diff --git a/girepository/cmph/bdz_gen_lookup_table.c b/girepository/cmph/bdz_gen_lookup_table.c
new file mode 100755
index 00000000..b8f66068
--- /dev/null
+++ b/girepository/cmph/bdz_gen_lookup_table.c
@@ -0,0 +1,33 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+void help(char * prname)
+{
+ fprintf(stderr, "USE: %s <n><wordsizeinbits>\n", prname);
+ exit(1);
+}
+
+int main(int argc, char ** argv)
+{
+ if(argc != 3) help(argv[0]);
+ int n = atoi(argv[1]);
+ int wordsize = (atoi(argv[2]) >> 1);
+ int i, j, n_assigned;
+ for(i = 0; i < n; i++)
+ {
+ int num = i;
+ n_assigned = 0;
+ for(j = 0; j < wordsize; j++)
+ {
+ if ((num & 0x0003) != 3)
+ {
+ n_assigned++;
+ //fprintf(stderr, "num:%d\n", num);
+ }
+ num = num >> 2;
+ }
+ if(i%16 == 0) fprintf(stderr, "\n");
+ fprintf(stderr, "%d, ", n_assigned);
+ }
+ fprintf(stderr, "\n");
+}
diff --git a/girepository/cmph/bdz_ph.c b/girepository/cmph/bdz_ph.c
new file mode 100755
index 00000000..49cf5646
--- /dev/null
+++ b/girepository/cmph/bdz_ph.c
@@ -0,0 +1,621 @@
+#include "bdz_ph.h"
+#include "cmph_structs.h"
+#include "bdz_structs_ph.h"
+#include "hash.h"
+#include "bitbool.h"
+
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+//#define DEBUG
+#include "debug.h"
+#define UNASSIGNED 3
+#define NULL_EDGE 0xffffffff
+
+
+static cmph_uint8 pow3_table[5] = {1,3,9,27,81};
+static cmph_uint8 lookup_table[5][256] = {
+ {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0},
+ {0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 2, 0, 0, 0, 1},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+};
+
+typedef struct
+{
+ cmph_uint32 vertices[3];
+ cmph_uint32 next_edges[3];
+}bdz_ph_edge_t;
+
+typedef cmph_uint32 * bdz_ph_queue_t;
+
+static void bdz_ph_alloc_queue(bdz_ph_queue_t * queuep, cmph_uint32 nedges)
+{
+ (*queuep)=malloc(nedges*sizeof(cmph_uint32));
+};
+static void bdz_ph_free_queue(bdz_ph_queue_t * queue)
+{
+ free(*queue);
+};
+
+typedef struct
+{
+ cmph_uint32 nedges;
+ bdz_ph_edge_t * edges;
+ cmph_uint32 * first_edge;
+ cmph_uint8 * vert_degree;
+}bdz_ph_graph3_t;
+
+
+static void bdz_ph_alloc_graph3(bdz_ph_graph3_t * graph3, cmph_uint32 nedges, cmph_uint32 nvertices)
+{
+ graph3->edges=malloc(nedges*sizeof(bdz_ph_edge_t));
+ graph3->first_edge=malloc(nvertices*sizeof(cmph_uint32));
+ graph3->vert_degree=malloc((size_t)nvertices);
+};
+static void bdz_ph_init_graph3(bdz_ph_graph3_t * graph3, cmph_uint32 nedges, cmph_uint32 nvertices)
+{
+ memset(graph3->first_edge,0xff,nvertices*sizeof(cmph_uint32));
+ memset(graph3->vert_degree,0,(size_t)nvertices);
+ graph3->nedges=0;
+};
+static void bdz_ph_free_graph3(bdz_ph_graph3_t *graph3)
+{
+ free(graph3->edges);
+ free(graph3->first_edge);
+ free(graph3->vert_degree);
+};
+
+static void bdz_ph_partial_free_graph3(bdz_ph_graph3_t *graph3)
+{
+ free(graph3->first_edge);
+ free(graph3->vert_degree);
+ graph3->first_edge = NULL;
+ graph3->vert_degree = NULL;
+};
+
+static void bdz_ph_add_edge(bdz_ph_graph3_t * graph3, cmph_uint32 v0, cmph_uint32 v1, cmph_uint32 v2)
+{
+ graph3->edges[graph3->nedges].vertices[0]=v0;
+ graph3->edges[graph3->nedges].vertices[1]=v1;
+ graph3->edges[graph3->nedges].vertices[2]=v2;
+ graph3->edges[graph3->nedges].next_edges[0]=graph3->first_edge[v0];
+ graph3->edges[graph3->nedges].next_edges[1]=graph3->first_edge[v1];
+ graph3->edges[graph3->nedges].next_edges[2]=graph3->first_edge[v2];
+ graph3->first_edge[v0]=graph3->first_edge[v1]=graph3->first_edge[v2]=graph3->nedges;
+ graph3->vert_degree[v0]++;
+ graph3->vert_degree[v1]++;
+ graph3->vert_degree[v2]++;
+ graph3->nedges++;
+};
+
+static void bdz_ph_dump_graph(bdz_ph_graph3_t* graph3, cmph_uint32 nedges, cmph_uint32 nvertices)
+{
+ int i;
+ for(i=0;i<nedges;i++){
+ printf("\nedge %d %d %d %d ",i,graph3->edges[i].vertices[0],
+ graph3->edges[i].vertices[1],graph3->edges[i].vertices[2]);
+ printf(" nexts %d %d %d",graph3->edges[i].next_edges[0],
+ graph3->edges[i].next_edges[1],graph3->edges[i].next_edges[2]);
+ };
+
+ for(i=0;i<nvertices;i++){
+ printf("\nfirst for vertice %d %d ",i,graph3->first_edge[i]);
+
+ };
+};
+
+static void bdz_ph_remove_edge(bdz_ph_graph3_t * graph3, cmph_uint32 curr_edge)
+{
+ cmph_uint32 i,j=0,vert,edge1,edge2;
+ for(i=0;i<3;i++){
+ vert=graph3->edges[curr_edge].vertices[i];
+ edge1=graph3->first_edge[vert];
+ edge2=NULL_EDGE;
+ while(edge1!=curr_edge&&edge1!=NULL_EDGE){
+ edge2=edge1;
+ if(graph3->edges[edge1].vertices[0]==vert){
+ j=0;
+ } else if(graph3->edges[edge1].vertices[1]==vert){
+ j=1;
+ } else
+ j=2;
+ edge1=graph3->edges[edge1].next_edges[j];
+ };
+ if(edge1==NULL_EDGE){
+ printf("\nerror remove edge %d dump graph",curr_edge);
+ bdz_ph_dump_graph(graph3,graph3->nedges,graph3->nedges+graph3->nedges/4);
+ exit(-1);
+ };
+
+ if(edge2!=NULL_EDGE){
+ graph3->edges[edge2].next_edges[j] =
+ graph3->edges[edge1].next_edges[i];
+ } else
+ graph3->first_edge[vert]=
+ graph3->edges[edge1].next_edges[i];
+ graph3->vert_degree[vert]--;
+ };
+
+};
+
+static int bdz_ph_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_ph_queue_t queue, bdz_ph_graph3_t* graph3)
+{
+ cmph_uint32 i,v0,v1,v2;
+ cmph_uint32 queue_head=0,queue_tail=0;
+ cmph_uint32 curr_edge;
+ cmph_uint32 tmp_edge;
+ cmph_uint8 * marked_edge =malloc((size_t)(nedges >> 3) + 1);
+ memset(marked_edge, 0, (size_t)(nedges >> 3) + 1);
+
+ for(i=0;i<nedges;i++){
+ v0=graph3->edges[i].vertices[0];
+ v1=graph3->edges[i].vertices[1];
+ v2=graph3->edges[i].vertices[2];
+ if(graph3->vert_degree[v0]==1 ||
+ graph3->vert_degree[v1]==1 ||
+ graph3->vert_degree[v2]==1){
+ if(!GETBIT(marked_edge,i)) {
+ queue[queue_head++]=i;
+ SETBIT(marked_edge,i);
+ }
+ };
+ };
+ while(queue_tail!=queue_head){
+ curr_edge=queue[queue_tail++];
+ bdz_ph_remove_edge(graph3,curr_edge);
+ v0=graph3->edges[curr_edge].vertices[0];
+ v1=graph3->edges[curr_edge].vertices[1];
+ v2=graph3->edges[curr_edge].vertices[2];
+ if(graph3->vert_degree[v0]==1 ) {
+ tmp_edge=graph3->first_edge[v0];
+ if(!GETBIT(marked_edge,tmp_edge)) {
+ queue[queue_head++]=tmp_edge;
+ SETBIT(marked_edge,tmp_edge);
+ };
+
+ };
+ if(graph3->vert_degree[v1]==1) {
+ tmp_edge=graph3->first_edge[v1];
+ if(!GETBIT(marked_edge,tmp_edge)){
+ queue[queue_head++]=tmp_edge;
+ SETBIT(marked_edge,tmp_edge);
+ };
+
+ };
+ if(graph3->vert_degree[v2]==1){
+ tmp_edge=graph3->first_edge[v2];
+ if(!GETBIT(marked_edge,tmp_edge)){
+ queue[queue_head++]=tmp_edge;
+ SETBIT(marked_edge,tmp_edge);
+ };
+ };
+ };
+ free(marked_edge);
+ return (int)queue_head - (int)nedges;/* returns 0 if successful otherwies return negative number*/
+};
+
+static int bdz_ph_mapping(cmph_config_t *mph, bdz_ph_graph3_t* graph3, bdz_ph_queue_t queue);
+static void assigning(bdz_ph_config_data_t *bdz_ph, bdz_ph_graph3_t* graph3, bdz_ph_queue_t queue);
+static void bdz_ph_optimization(bdz_ph_config_data_t *bdz_ph);
+
+bdz_ph_config_data_t *bdz_ph_config_new()
+{
+ bdz_ph_config_data_t *bdz_ph;
+ bdz_ph = (bdz_ph_config_data_t *)malloc(sizeof(bdz_ph_config_data_t));
+ assert(bdz_ph);
+ memset(bdz_ph, 0, sizeof(bdz_ph_config_data_t));
+ bdz_ph->hashfunc = CMPH_HASH_JENKINS;
+ bdz_ph->g = NULL;
+ bdz_ph->hl = NULL;
+ return bdz_ph;
+}
+
+void bdz_ph_config_destroy(cmph_config_t *mph)
+{
+ bdz_ph_config_data_t *data = (bdz_ph_config_data_t *)mph->data;
+ DEBUGP("Destroying algorithm dependent data\n");
+ free(data);
+}
+
+void bdz_ph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
+{
+ bdz_ph_config_data_t *bdz_ph = (bdz_ph_config_data_t *)mph->data;
+ CMPH_HASH *hashptr = hashfuncs;
+ cmph_uint32 i = 0;
+ while(*hashptr != CMPH_HASH_COUNT)
+ {
+ if (i >= 1) break; //bdz_ph only uses one linear hash function
+ bdz_ph->hashfunc = *hashptr;
+ ++i, ++hashptr;
+ }
+}
+
+cmph_t *bdz_ph_new(cmph_config_t *mph, double c)
+{
+ cmph_t *mphf = NULL;
+ bdz_ph_data_t *bdz_phf = NULL;
+ cmph_uint32 iterations;
+ bdz_ph_queue_t edges;
+ bdz_ph_graph3_t graph3;
+ bdz_ph_config_data_t *bdz_ph = (bdz_ph_config_data_t *)mph->data;
+ #ifdef CMPH_TIMING
+ double construction_time_begin = 0.0;
+ double construction_time = 0.0;
+ ELAPSED_TIME_IN_SECONDS(&construction_time_begin);
+ #endif
+
+
+ if (c == 0) c = 1.23; // validating restrictions over parameter c.
+ DEBUGP("c: %f\n", c);
+ bdz_ph->m = mph->key_source->nkeys;
+ bdz_ph->r = (cmph_uint32)ceil((c * mph->key_source->nkeys)/3);
+ if ((bdz_ph->r % 2) == 0) bdz_ph->r += 1;
+ bdz_ph->n = 3*bdz_ph->r;
+
+
+ bdz_ph_alloc_graph3(&graph3, bdz_ph->m, bdz_ph->n);
+ bdz_ph_alloc_queue(&edges,bdz_ph->m);
+ DEBUGP("Created hypergraph\n");
+
+ DEBUGP("m (edges): %u n (vertices): %u r: %u c: %f \n", bdz_ph->m, bdz_ph->n, bdz_ph->r, c);
+
+ // Mapping step
+ iterations = 100;
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Entering mapping step for mph creation of %u keys with graph sized %u\n", bdz_ph->m, bdz_ph->n);
+ }
+ while(1)
+ {
+ int ok;
+ DEBUGP("linear hash function \n");
+ bdz_ph->hl = hash_state_new(bdz_ph->hashfunc, 15);
+
+ ok = bdz_ph_mapping(mph, &graph3, edges);
+ if (!ok)
+ {
+ --iterations;
+ hash_state_destroy(bdz_ph->hl);
+ bdz_ph->hl = NULL;
+ DEBUGP("%u iterations remaining\n", iterations);
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "acyclic graph creation failure - %u iterations remaining\n", iterations);
+ }
+ if (iterations == 0) break;
+ }
+ else break;
+ }
+
+ if (iterations == 0)
+ {
+// free(bdz_ph->g);
+ bdz_ph_free_queue(&edges);
+ bdz_ph_free_graph3(&graph3);
+ return NULL;
+ }
+ bdz_ph_partial_free_graph3(&graph3);
+ // Assigning step
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Entering assigning step for mph creation of %u keys with graph sized %u\n", bdz_ph->m, bdz_ph->n);
+ }
+ assigning(bdz_ph, &graph3, edges);
+
+ bdz_ph_free_queue(&edges);
+ bdz_ph_free_graph3(&graph3);
+
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Starting optimization step\n");
+ }
+
+ bdz_ph_optimization(bdz_ph);
+
+ #ifdef CMPH_TIMING
+ ELAPSED_TIME_IN_SECONDS(&construction_time);
+ #endif
+ mphf = (cmph_t *)malloc(sizeof(cmph_t));
+ mphf->algo = mph->algo;
+ bdz_phf = (bdz_ph_data_t *)malloc(sizeof(bdz_ph_data_t));
+ bdz_phf->g = bdz_ph->g;
+ bdz_ph->g = NULL; //transfer memory ownership
+ bdz_phf->hl = bdz_ph->hl;
+ bdz_ph->hl = NULL; //transfer memory ownership
+ bdz_phf->n = bdz_ph->n;
+ bdz_phf->m = bdz_ph->m;
+ bdz_phf->r = bdz_ph->r;
+ mphf->data = bdz_phf;
+ mphf->size = bdz_ph->n;
+
+ DEBUGP("Successfully generated minimal perfect hash\n");
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Successfully generated minimal perfect hash function\n");
+ }
+
+ #ifdef CMPH_TIMING
+ register cmph_uint32 space_usage = bdz_ph_packed_size(mphf)*8;
+ register cmph_uint32 keys_per_bucket = 1;
+ construction_time = construction_time - construction_time_begin;
+ fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", bdz_ph->m, bdz_ph->m/(double)bdz_ph->n, keys_per_bucket, construction_time, space_usage/(double)bdz_ph->m);
+ #endif
+
+ return mphf;
+}
+
+
+static int bdz_ph_mapping(cmph_config_t *mph, bdz_ph_graph3_t* graph3, bdz_ph_queue_t queue)
+{
+ cmph_uint32 e;
+ int cycles = 0;
+ cmph_uint32 hl[3];
+
+ bdz_ph_config_data_t *bdz_ph = (bdz_ph_config_data_t *)mph->data;
+ bdz_ph_init_graph3(graph3, bdz_ph->m, bdz_ph->n);
+ mph->key_source->rewind(mph->key_source->data);
+ for (e = 0; e < mph->key_source->nkeys; ++e)
+ {
+ cmph_uint32 h0, h1, h2;
+ cmph_uint32 keylen;
+ char *key = NULL;
+ mph->key_source->read(mph->key_source->data, &key, &keylen);
+ hash_vector(bdz_ph->hl, key, keylen, hl);
+ h0 = hl[0] % bdz_ph->r;
+ h1 = hl[1] % bdz_ph->r + bdz_ph->r;
+ h2 = hl[2] % bdz_ph->r + (bdz_ph->r << 1);
+ mph->key_source->dispose(mph->key_source->data, key, keylen);
+ bdz_ph_add_edge(graph3,h0,h1,h2);
+ }
+ cycles = bdz_ph_generate_queue(bdz_ph->m, bdz_ph->n, queue, graph3);
+ return (cycles == 0);
+}
+
+static void assigning(bdz_ph_config_data_t *bdz_ph, bdz_ph_graph3_t* graph3, bdz_ph_queue_t queue)
+{
+ cmph_uint32 i;
+ cmph_uint32 nedges=graph3->nedges;
+ cmph_uint32 curr_edge;
+ cmph_uint32 v0,v1,v2;
+ cmph_uint8 * marked_vertices =malloc((size_t)(bdz_ph->n >> 3) + 1);
+ cmph_uint32 sizeg = (cmph_uint32)ceil(bdz_ph->n/4.0);
+ bdz_ph->g = (cmph_uint8 *)calloc((size_t)sizeg, sizeof(cmph_uint8));
+ memset(marked_vertices, 0, (size_t)(bdz_ph->n >> 3) + 1);
+ //memset(bdz_ph->g, 0xff, sizeg);
+
+ for(i=nedges-1;i+1>=1;i--){
+ curr_edge=queue[i];
+ v0=graph3->edges[curr_edge].vertices[0];
+ v1=graph3->edges[curr_edge].vertices[1];
+ v2=graph3->edges[curr_edge].vertices[2];
+ DEBUGP("B:%u %u %u -- %u %u %u\n", v0, v1, v2, GETVALUE(bdz_ph->g, v0), GETVALUE(bdz_ph->g, v1), GETVALUE(bdz_ph->g, v2));
+ if(!GETBIT(marked_vertices, v0)){
+ if(!GETBIT(marked_vertices,v1))
+ {
+ //SETVALUE(bdz_ph->g, v1, UNASSIGNED);
+ SETBIT(marked_vertices, v1);
+ }
+ if(!GETBIT(marked_vertices,v2))
+ {
+ //SETVALUE(bdz_ph->g, v2, UNASSIGNED);
+ SETBIT(marked_vertices, v2);
+ }
+ SETVALUE0(bdz_ph->g, v0, (6-(GETVALUE(bdz_ph->g, v1) + GETVALUE(bdz_ph->g,v2)))%3);
+ SETBIT(marked_vertices, v0);
+ } else if(!GETBIT(marked_vertices, v1)) {
+ if(!GETBIT(marked_vertices, v2))
+ {
+ //SETVALUE(bdz_ph->g, v2, UNASSIGNED);
+ SETBIT(marked_vertices, v2);
+ }
+ SETVALUE0(bdz_ph->g, v1, (7 - (GETVALUE(bdz_ph->g, v0)+GETVALUE(bdz_ph->g, v2)))%3);
+ SETBIT(marked_vertices, v1);
+ }else {
+ SETVALUE0(bdz_ph->g, v2, (8-(GETVALUE(bdz_ph->g,v0)+GETVALUE(bdz_ph->g, v1)))%3);
+ SETBIT(marked_vertices, v2);
+ }
+ DEBUGP("A:%u %u %u -- %u %u %u\n", v0, v1, v2, GETVALUE(bdz_ph->g, v0), GETVALUE(bdz_ph->g, v1), GETVALUE(bdz_ph->g, v2));
+ };
+ free(marked_vertices);
+}
+
+static void bdz_ph_optimization(bdz_ph_config_data_t *bdz_ph)
+{
+ cmph_uint32 i;
+ cmph_uint8 byte = 0;
+ cmph_uint32 sizeg = (cmph_uint32)ceil(bdz_ph->n/5.0);
+ cmph_uint8 * new_g = (cmph_uint8 *)calloc((size_t)sizeg, sizeof(cmph_uint8));
+ cmph_uint8 value;
+ cmph_uint32 idx;
+ for(i = 0; i < bdz_ph->n; i++)
+ {
+ idx = i/5;
+ byte = new_g[idx];
+ value = GETVALUE(bdz_ph->g, i);
+ byte = (cmph_uint8) (byte + value*pow3_table[i%5U]);
+ new_g[idx] = byte;
+ }
+ free(bdz_ph->g);
+ bdz_ph->g = new_g;
+}
+
+
+int bdz_ph_dump(cmph_t *mphf, FILE *fd)
+{
+ char *buf = NULL;
+ cmph_uint32 buflen;
+ cmph_uint32 sizeg = 0;
+ register size_t nbytes;
+ bdz_ph_data_t *data = (bdz_ph_data_t *)mphf->data;
+ __cmph_dump(mphf, fd);
+
+ hash_state_dump(data->hl, &buf, &buflen);
+ DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
+ nbytes = fwrite(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
+ nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd);
+ free(buf);
+
+ nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd);
+ nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
+ nbytes = fwrite(&(data->r), sizeof(cmph_uint32), (size_t)1, fd);
+ sizeg = (cmph_uint32)ceil(data->n/5.0);
+ nbytes = fwrite(data->g, sizeof(cmph_uint8)*sizeg, (size_t)1, fd);
+
+ #ifdef DEBUG
+ cmph_uint32 i;
+ fprintf(stderr, "G: ");
+ for (i = 0; i < data->n; ++i) fprintf(stderr, "%u ", GETVALUE(data->g, i));
+ fprintf(stderr, "\n");
+ #endif
+ return 1;
+}
+
+void bdz_ph_load(FILE *f, cmph_t *mphf)
+{
+ char *buf = NULL;
+ cmph_uint32 buflen;
+ cmph_uint32 sizeg = 0;
+ register size_t nbytes;
+ bdz_ph_data_t *bdz_ph = (bdz_ph_data_t *)malloc(sizeof(bdz_ph_data_t));
+
+ DEBUGP("Loading bdz_ph mphf\n");
+ mphf->data = bdz_ph;
+
+ nbytes = fread(&buflen, sizeof(cmph_uint32), (size_t)1, f);
+ DEBUGP("Hash state has %u bytes\n", buflen);
+ buf = (char *)malloc((size_t)buflen);
+ nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
+ bdz_ph->hl = hash_state_load(buf, buflen);
+ free(buf);
+
+
+ DEBUGP("Reading m and n\n");
+ nbytes = fread(&(bdz_ph->n), sizeof(cmph_uint32), (size_t)1, f);
+ nbytes = fread(&(bdz_ph->m), sizeof(cmph_uint32), (size_t)1, f);
+ nbytes = fread(&(bdz_ph->r), sizeof(cmph_uint32), (size_t)1, f);
+ sizeg = (cmph_uint32)ceil(bdz_ph->n/5.0);
+ bdz_ph->g = (cmph_uint8 *)calloc((size_t)sizeg, sizeof(cmph_uint8));
+ nbytes = fread(bdz_ph->g, sizeg*sizeof(cmph_uint8), (size_t)1, f);
+
+ return;
+}
+
+
+cmph_uint32 bdz_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
+{
+ register bdz_ph_data_t *bdz_ph = mphf->data;
+ cmph_uint32 hl[3];
+ register cmph_uint8 byte0, byte1, byte2;
+ register cmph_uint32 vertex;
+
+ hash_vector(bdz_ph->hl, key, keylen,hl);
+ hl[0] = hl[0] % bdz_ph->r;
+ hl[1] = hl[1] % bdz_ph->r + bdz_ph->r;
+ hl[2] = hl[2] % bdz_ph->r + (bdz_ph->r << 1);
+
+ byte0 = bdz_ph->g[hl[0]/5];
+ byte1 = bdz_ph->g[hl[1]/5];
+ byte2 = bdz_ph->g[hl[2]/5];
+
+ byte0 = lookup_table[hl[0]%5U][byte0];
+ byte1 = lookup_table[hl[1]%5U][byte1];
+ byte2 = lookup_table[hl[2]%5U][byte2];
+ vertex = hl[(byte0 + byte1 + byte2)%3];
+
+ return vertex;
+}
+
+
+void bdz_ph_destroy(cmph_t *mphf)
+{
+ bdz_ph_data_t *data = (bdz_ph_data_t *)mphf->data;
+ free(data->g);
+ hash_state_destroy(data->hl);
+ free(data);
+ free(mphf);
+}
+
+/** \fn void bdz_ph_pack(cmph_t *mphf, void *packed_mphf);
+ * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
+ * \param mphf pointer to the resulting mphf
+ * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
+ */
+void bdz_ph_pack(cmph_t *mphf, void *packed_mphf)
+{
+ bdz_ph_data_t *data = (bdz_ph_data_t *)mphf->data;
+ cmph_uint8 * ptr = packed_mphf;
+
+ // packing hl type
+ CMPH_HASH hl_type = hash_get_type(data->hl);
+ *((cmph_uint32 *) ptr) = hl_type;
+ ptr += sizeof(cmph_uint32);
+
+ // packing hl
+ hash_state_pack(data->hl, ptr);
+ ptr += hash_state_packed_size(hl_type);
+
+ // packing r
+ *((cmph_uint32 *) ptr) = data->r;
+ ptr += sizeof(data->r);
+
+ // packing g
+ cmph_uint32 sizeg = (cmph_uint32)ceil(data->n/5.0);
+ memcpy(ptr, data->g, sizeof(cmph_uint8)*sizeg);
+}
+
+/** \fn cmph_uint32 bdz_ph_packed_size(cmph_t *mphf);
+ * \brief Return the amount of space needed to pack mphf.
+ * \param mphf pointer to a mphf
+ * \return the size of the packed function or zero for failures
+ */
+cmph_uint32 bdz_ph_packed_size(cmph_t *mphf)
+{
+ bdz_ph_data_t *data = (bdz_ph_data_t *)mphf->data;
+ CMPH_HASH hl_type = hash_get_type(data->hl);
+ cmph_uint32 sizeg = (cmph_uint32)ceil(data->n/5.0);
+ return (cmph_uint32) (sizeof(CMPH_ALGO) + hash_state_packed_size(hl_type) + 2*sizeof(cmph_uint32) + sizeof(cmph_uint8)*sizeg);
+}
+
+/** cmph_uint32 bdz_ph_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
+ * \brief Use the packed mphf to do a search.
+ * \param packed_mphf pointer to the packed mphf
+ * \param key key to be hashed
+ * \param keylen key legth in bytes
+ * \return The mphf value
+ */
+cmph_uint32 bdz_ph_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
+{
+
+ register CMPH_HASH hl_type = *(cmph_uint32 *)packed_mphf;
+ register cmph_uint8 *hl_ptr = (cmph_uint8 *)(packed_mphf) + 4;
+
+ register cmph_uint8 * ptr = hl_ptr + hash_state_packed_size(hl_type);
+
+ register cmph_uint32 r = *((cmph_uint32*) ptr);
+ register cmph_uint8 * g = ptr + 4;
+
+ cmph_uint32 hl[3];
+ register cmph_uint8 byte0, byte1, byte2;
+ register cmph_uint32 vertex;
+
+ hash_vector_packed(hl_ptr, hl_type, key, keylen, hl);
+
+ hl[0] = hl[0] % r;
+ hl[1] = hl[1] % r + r;
+ hl[2] = hl[2] % r + (r << 1);
+
+ byte0 = g[hl[0]/5];
+ byte1 = g[hl[1]/5];
+ byte2 = g[hl[2]/5];
+
+ byte0 = lookup_table[hl[0]%5][byte0];
+ byte1 = lookup_table[hl[1]%5][byte1];
+ byte2 = lookup_table[hl[2]%5][byte2];
+ vertex = hl[(byte0 + byte1 + byte2)%3];
+
+ return vertex;
+}
diff --git a/girepository/cmph/bdz_ph.h b/girepository/cmph/bdz_ph.h
new file mode 100755
index 00000000..73cce2ed
--- /dev/null
+++ b/girepository/cmph/bdz_ph.h
@@ -0,0 +1,42 @@
+#ifndef __CMPH_BDZ_PH_H__
+#define __CMPH_BDZ_PH_H__
+
+#include "cmph.h"
+
+typedef struct __bdz_ph_data_t bdz_ph_data_t;
+typedef struct __bdz_ph_config_data_t bdz_ph_config_data_t;
+
+bdz_ph_config_data_t *bdz_ph_config_new();
+void bdz_ph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
+void bdz_ph_config_destroy(cmph_config_t *mph);
+cmph_t *bdz_ph_new(cmph_config_t *mph, double c);
+
+void bdz_ph_load(FILE *f, cmph_t *mphf);
+int bdz_ph_dump(cmph_t *mphf, FILE *f);
+void bdz_ph_destroy(cmph_t *mphf);
+cmph_uint32 bdz_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
+
+/** \fn void bdz_ph_pack(cmph_t *mphf, void *packed_mphf);
+ * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
+ * \param mphf pointer to the resulting mphf
+ * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
+ */
+void bdz_ph_pack(cmph_t *mphf, void *packed_mphf);
+
+/** \fn cmph_uint32 bdz_ph_packed_size(cmph_t *mphf);
+ * \brief Return the amount of space needed to pack mphf.
+ * \param mphf pointer to a mphf
+ * \return the size of the packed function or zero for failures
+ */
+cmph_uint32 bdz_ph_packed_size(cmph_t *mphf);
+
+/** cmph_uint32 bdz_ph_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
+ * \brief Use the packed mphf to do a search.
+ * \param packed_mphf pointer to the packed mphf
+ * \param key key to be hashed
+ * \param keylen key legth in bytes
+ * \return The mphf value
+ */
+cmph_uint32 bdz_ph_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
+
+#endif
diff --git a/girepository/cmph/bdz_structs.h b/girepository/cmph/bdz_structs.h
new file mode 100755
index 00000000..ba7dc3c6
--- /dev/null
+++ b/girepository/cmph/bdz_structs.h
@@ -0,0 +1,36 @@
+#ifndef __CMPH_BDZ_STRUCTS_H__
+#define __CMPH_BDZ_STRUCTS_H__
+
+#include "hash_state.h"
+
+struct __bdz_data_t
+{
+ cmph_uint32 m; //edges (words) count
+ cmph_uint32 n; //vertex count
+ cmph_uint32 r; //partition vertex count
+ cmph_uint8 *g;
+ hash_state_t *hl; // linear hashing
+
+ cmph_uint32 k; //kth index in ranktable, $k = log_2(n=3r)/\varepsilon$
+ cmph_uint8 b; // number of bits of k
+ cmph_uint32 ranktablesize; //number of entries in ranktable, $n/k +1$
+ cmph_uint32 *ranktable; // rank table
+};
+
+
+struct __bdz_config_data_t
+{
+ cmph_uint32 m; //edges (words) count
+ cmph_uint32 n; //vertex count
+ cmph_uint32 r; //partition vertex count
+ cmph_uint8 *g;
+ hash_state_t *hl; // linear hashing
+
+ cmph_uint32 k; //kth index in ranktable, $k = log_2(n=3r)/\varepsilon$
+ cmph_uint8 b; // number of bits of k
+ cmph_uint32 ranktablesize; //number of entries in ranktable, $n/k +1$
+ cmph_uint32 *ranktable; // rank table
+ CMPH_HASH hashfunc;
+};
+
+#endif
diff --git a/girepository/cmph/bdz_structs_ph.h b/girepository/cmph/bdz_structs_ph.h
new file mode 100755
index 00000000..5874a26d
--- /dev/null
+++ b/girepository/cmph/bdz_structs_ph.h
@@ -0,0 +1,26 @@
+#ifndef __CMPH_BDZ_STRUCTS_PH_H__
+#define __CMPH_BDZ_STRUCTS_PH_H__
+
+#include "hash_state.h"
+
+struct __bdz_ph_data_t
+{
+ cmph_uint32 m; //edges (words) count
+ cmph_uint32 n; //vertex count
+ cmph_uint32 r; //partition vertex count
+ cmph_uint8 *g;
+ hash_state_t *hl; // linear hashing
+};
+
+
+struct __bdz_ph_config_data_t
+{
+ CMPH_HASH hashfunc;
+ cmph_uint32 m; //edges (words) count
+ cmph_uint32 n; //vertex count
+ cmph_uint32 r; //partition vertex count
+ cmph_uint8 *g;
+ hash_state_t *hl; // linear hashing
+};
+
+#endif
diff --git a/girepository/cmph/bitbool.h b/girepository/cmph/bitbool.h
new file mode 100644
index 00000000..a3286c3c
--- /dev/null
+++ b/girepository/cmph/bitbool.h
@@ -0,0 +1,179 @@
+#ifndef _CMPH_BITBOOL_H__
+#define _CMPH_BITBOOL_H__
+#include "cmph_types.h"
+
+static const cmph_uint8 bitmask[] = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
+
+static const cmph_uint32 bitmask32[] = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7,
+ 1 << 8, 1 << 9, 1 << 10, 1 << 11, 1 << 12, 1 << 13, 1 << 14, 1 << 15,
+ 1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20, 1 << 21, 1 << 22, 1 << 23,
+ 1 << 24, 1 << 25, 1 << 26, 1 << 27, 1 << 28, 1 << 29, 1 << 30, 1U << 31
+ };
+
+static const cmph_uint8 valuemask[] = { 0xfc, 0xf3, 0xcf, 0x3f};
+
+
+/** \def GETBIT(array, i)
+ * \brief get the value of an 1-bit integer stored in an array.
+ * \param array to get 1-bit integer values from
+ * \param i is the index in array to get the 1-bit integer value from
+ *
+ * GETBIT(array, i) is a macro that gets the value of an 1-bit integer stored in array.
+ */
+#define GETBIT(array, i) ((array[i >> 3] & bitmask[i & 0x00000007]) >> (i & 0x00000007))
+
+/** \def SETBIT(array, i)
+ * \brief set 1 to an 1-bit integer stored in an array.
+ * \param array to store 1-bit integer values
+ * \param i is the index in array to set the the bit to 1
+ *
+ * SETBIT(array, i) is a macro that sets 1 to an 1-bit integer stored in an array.
+ */
+#define SETBIT(array, i) (array[i >> 3] |= bitmask[i & 0x00000007])
+
+/** \def UNSETBIT(array, i)
+ * \brief set 0 to an 1-bit integer stored in an array.
+ * \param array to store 1-bit integer values
+ * \param i is the index in array to set the the bit to 0
+ *
+ * UNSETBIT(array, i) is a macro that sets 0 to an 1-bit integer stored in an array.
+ */
+#define UNSETBIT(array, i) (array[i >> 3] ^= ((bitmask[i & 0x00000007])))
+
+//#define GETBIT(array, i) (array[(i) / 8] & bitmask[(i) % 8])
+//#define SETBIT(array, i) (array[(i) / 8] |= bitmask[(i) % 8])
+//#define UNSETBIT(array, i) (array[(i) / 8] ^= ((bitmask[(i) % 8])))
+
+
+/** \def SETVALUE1(array, i, v)
+ * \brief set a value for a 2-bit integer stored in an array initialized with 1s.
+ * \param array to store 2-bit integer values
+ * \param i is the index in array to set the value v
+ * \param v is the value to be set
+ *
+ * SETVALUE1(array, i, v) is a macro that set a value for a 2-bit integer stored in an array.
+ * The array should be initialized with all bits set to 1. For example:
+ * memset(array, 0xff, arraySize);
+ */
+#define SETVALUE1(array, i, v) (array[i >> 2] &= (cmph_uint8)((v << ((i & 0x00000003) << 1)) | valuemask[i & 0x00000003]))
+
+/** \def SETVALUE0(array, i, v)
+ * \brief set a value for a 2-bit integer stored in an array initialized with 0s.
+ * \param array to store 2-bit integer values
+ * \param i is the index in array to set the value v
+ * \param v is the value to be set
+ *
+ * SETVALUE0(array, i, v) is a macro that set a value for a 2-bit integer stored in an array.
+ * The array should be initialized with all bits set to 0. For example:
+ * memset(array, 0, arraySize);
+ */
+#define SETVALUE0(array, i, v) (array[i >> 2] |= (cmph_uint8)(v << ((i & 0x00000003) << 1)))
+
+
+/** \def GETVALUE(array, i)
+ * \brief get a value for a 2-bit integer stored in an array.
+ * \param array to get 2-bit integer values from
+ * \param i is the index in array to get the value from
+ *
+ * GETVALUE(array, i) is a macro that get a value for a 2-bit integer stored in an array.
+ */
+#define GETVALUE(array, i) ((cmph_uint8)((array[i >> 2] >> ((i & 0x00000003U) << 1U)) & 0x00000003U))
+
+
+
+/** \def SETBIT32(array, i)
+ * \brief set 1 to an 1-bit integer stored in an array of 32-bit words.
+ * \param array to store 1-bit integer values. The entries are 32-bit words.
+ * \param i is the index in array to set the the bit to 1
+ *
+ * SETBIT32(array, i) is a macro that sets 1 to an 1-bit integer stored in an array of 32-bit words.
+ */
+#define SETBIT32(array, i) (array[i >> 5] |= bitmask32[i & 0x0000001f])
+
+/** \def GETBIT32(array, i)
+ * \brief get the value of an 1-bit integer stored in an array of 32-bit words.
+ * \param array to get 1-bit integer values from. The entries are 32-bit words.
+ * \param i is the index in array to get the 1-bit integer value from
+ *
+ * GETBIT32(array, i) is a macro that gets the value of an 1-bit integer stored in an array of 32-bit words.
+ */
+#define GETBIT32(array, i) (array[i >> 5] & bitmask32[i & 0x0000001f])
+
+/** \def UNSETBIT32(array, i)
+ * \brief set 0 to an 1-bit integer stored in an array of 32-bit words.
+ * \param array to store 1-bit integer values. The entries ar 32-bit words
+ * \param i is the index in array to set the the bit to 0
+ *
+ * UNSETBIT32(array, i) is a macro that sets 0 to an 1-bit integer stored in an array of 32-bit words.
+ */
+#define UNSETBIT32(array, i) (array[i >> 5] ^= ((bitmask32[i & 0x0000001f])))
+
+#define BITS_TABLE_SIZE(n, bits_length) ((n * bits_length + 31) >> 5)
+
+static inline void set_bits_value(cmph_uint32 * bits_table, cmph_uint32 index, cmph_uint32 bits_string,
+ cmph_uint32 string_length, cmph_uint32 string_mask)
+{
+ register cmph_uint32 bit_idx = index * string_length;
+ register cmph_uint32 word_idx = bit_idx >> 5;
+ register cmph_uint32 shift1 = bit_idx & 0x0000001f;
+ register cmph_uint32 shift2 = 32 - shift1;
+
+ bits_table[word_idx] &= ~((string_mask) << shift1);
+ bits_table[word_idx] |= bits_string << shift1;
+
+ if(shift2 < string_length)
+ {
+ bits_table[word_idx+1] &= ~((string_mask) >> shift2);
+ bits_table[word_idx+1] |= bits_string >> shift2;
+ };
+};
+
+static inline cmph_uint32 get_bits_value(cmph_uint32 * bits_table,cmph_uint32 index, cmph_uint32 string_length, cmph_uint32 string_mask)
+{
+ register cmph_uint32 bit_idx = index * string_length;
+ register cmph_uint32 word_idx = bit_idx >> 5;
+ register cmph_uint32 shift1 = bit_idx & 0x0000001f;
+ register cmph_uint32 shift2 = 32-shift1;
+ register cmph_uint32 bits_string;
+
+ bits_string = (bits_table[word_idx] >> shift1) & string_mask;
+
+ if(shift2 < string_length)
+ bits_string |= (bits_table[word_idx+1] << shift2) & string_mask;
+
+ return bits_string;
+};
+
+static inline void set_bits_at_pos(cmph_uint32 * bits_table, cmph_uint32 pos, cmph_uint32 bits_string, cmph_uint32 string_length)
+{
+ register cmph_uint32 word_idx = pos >> 5;
+ register cmph_uint32 shift1 = pos & 0x0000001f;
+ register cmph_uint32 shift2 = 32-shift1;
+ register cmph_uint32 string_mask = (1U << string_length) - 1;
+
+ bits_table[word_idx] &= ~((string_mask) << shift1);
+ bits_table[word_idx] |= bits_string << shift1;
+ if(shift2 < string_length)
+ {
+ bits_table[word_idx+1] &= ~((string_mask) >> shift2);
+ bits_table[word_idx+1] |= bits_string >> shift2;
+ }
+};
+
+static inline cmph_uint32 get_bits_at_pos(cmph_uint32 * bits_table,cmph_uint32 pos,cmph_uint32 string_length)
+{
+ register cmph_uint32 word_idx = pos >> 5;
+ register cmph_uint32 shift1 = pos & 0x0000001f;
+ register cmph_uint32 shift2 = 32 - shift1;
+ register cmph_uint32 string_mask = (1U << string_length) - 1;
+ register cmph_uint32 bits_string;
+
+ bits_string = (bits_table[word_idx] >> shift1) & string_mask;
+
+ if(shift2 < string_length)
+ bits_string |= (bits_table[word_idx+1] << shift2) & string_mask;
+ return bits_string;
+}
+
+
+#endif
diff --git a/girepository/cmph/bmz.c b/girepository/cmph/bmz.c
new file mode 100644
index 00000000..51798a18
--- /dev/null
+++ b/girepository/cmph/bmz.c
@@ -0,0 +1,620 @@
+#include "graph.h"
+#include "bmz.h"
+#include "cmph_structs.h"
+#include "bmz_structs.h"
+#include "hash.h"
+#include "vqueue.h"
+#include "bitbool.h"
+
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+
+//#define DEBUG
+#include "debug.h"
+
+static int bmz_gen_edges(cmph_config_t *mph);
+static cmph_uint8 bmz_traverse_critical_nodes(bmz_config_data_t *bmz, cmph_uint32 v, cmph_uint32 * biggest_g_value, cmph_uint32 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited);
+static cmph_uint8 bmz_traverse_critical_nodes_heuristic(bmz_config_data_t *bmz, cmph_uint32 v, cmph_uint32 * biggest_g_value, cmph_uint32 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited);
+static void bmz_traverse_non_critical_nodes(bmz_config_data_t *bmz, cmph_uint8 * used_edges, cmph_uint8 * visited);
+
+bmz_config_data_t *bmz_config_new()
+{
+ bmz_config_data_t *bmz = NULL;
+ bmz = (bmz_config_data_t *)malloc(sizeof(bmz_config_data_t));
+ assert(bmz);
+ memset(bmz, 0, sizeof(bmz_config_data_t));
+ bmz->hashfuncs[0] = CMPH_HASH_JENKINS;
+ bmz->hashfuncs[1] = CMPH_HASH_JENKINS;
+ bmz->g = NULL;
+ bmz->graph = NULL;
+ bmz->hashes = NULL;
+ return bmz;
+}
+
+void bmz_config_destroy(cmph_config_t *mph)
+{
+ bmz_config_data_t *data = (bmz_config_data_t *)mph->data;
+ DEBUGP("Destroying algorithm dependent data\n");
+ free(data);
+}
+
+void bmz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
+{
+ bmz_config_data_t *bmz = (bmz_config_data_t *)mph->data;
+ CMPH_HASH *hashptr = hashfuncs;
+ cmph_uint32 i = 0;
+ while(*hashptr != CMPH_HASH_COUNT)
+ {
+ if (i >= 2) break; //bmz only uses two hash functions
+ bmz->hashfuncs[i] = *hashptr;
+ ++i, ++hashptr;
+ }
+}
+
+cmph_t *bmz_new(cmph_config_t *mph, double c)
+{
+ cmph_t *mphf = NULL;
+ bmz_data_t *bmzf = NULL;
+ cmph_uint32 i;
+ cmph_uint32 iterations;
+ cmph_uint32 iterations_map = 20;
+ cmph_uint8 *used_edges = NULL;
+ cmph_uint8 restart_mapping = 0;
+ cmph_uint8 * visited = NULL;
+
+ bmz_config_data_t *bmz = (bmz_config_data_t *)mph->data;
+ if (c == 0) c = 1.15; // validating restrictions over parameter c.
+ DEBUGP("c: %f\n", c);
+ bmz->m = mph->key_source->nkeys;
+ bmz->n = (cmph_uint32)ceil(c * mph->key_source->nkeys);
+ DEBUGP("m (edges): %u n (vertices): %u c: %f\n", bmz->m, bmz->n, c);
+ bmz->graph = graph_new(bmz->n, bmz->m);
+ DEBUGP("Created graph\n");
+
+ bmz->hashes = (hash_state_t **)malloc(sizeof(hash_state_t *)*3);
+ for(i = 0; i < 3; ++i) bmz->hashes[i] = NULL;
+
+ do
+ {
+ // Mapping step
+ cmph_uint32 biggest_g_value = 0;
+ cmph_uint32 biggest_edge_value = 1;
+ iterations = 100;
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Entering mapping step for mph creation of %u keys with graph sized %u\n", bmz->m, bmz->n);
+ }
+ while(1)
+ {
+ int ok;
+ DEBUGP("hash function 1\n");
+ bmz->hashes[0] = hash_state_new(bmz->hashfuncs[0], bmz->n);
+ DEBUGP("hash function 2\n");
+ bmz->hashes[1] = hash_state_new(bmz->hashfuncs[1], bmz->n);
+ DEBUGP("Generating edges\n");
+ ok = bmz_gen_edges(mph);
+ if (!ok)
+ {
+ --iterations;
+ hash_state_destroy(bmz->hashes[0]);
+ bmz->hashes[0] = NULL;
+ hash_state_destroy(bmz->hashes[1]);
+ bmz->hashes[1] = NULL;
+ DEBUGP("%u iterations remaining\n", iterations);
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "simple graph creation failure - %u iterations remaining\n", iterations);
+ }
+ if (iterations == 0) break;
+ }
+ else break;
+ }
+ if (iterations == 0)
+ {
+ graph_destroy(bmz->graph);
+ return NULL;
+ }
+ // Ordering step
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Starting ordering step\n");
+ }
+ graph_obtain_critical_nodes(bmz->graph);
+
+ // Searching step
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Starting Searching step.\n");
+ fprintf(stderr, "\tTraversing critical vertices.\n");
+ }
+ DEBUGP("Searching step\n");
+ visited = (cmph_uint8 *)malloc((size_t)bmz->n/8 + 1);
+ memset(visited, 0, (size_t)bmz->n/8 + 1);
+ used_edges = (cmph_uint8 *)malloc((size_t)bmz->m/8 + 1);
+ memset(used_edges, 0, (size_t)bmz->m/8 + 1);
+ free(bmz->g);
+ bmz->g = (cmph_uint32 *)calloc((size_t)bmz->n, sizeof(cmph_uint32));
+ assert(bmz->g);
+ for (i = 0; i < bmz->n; ++i) // critical nodes
+ {
+ if (graph_node_is_critical(bmz->graph, i) && (!GETBIT(visited,i)))
+ {
+ if(c > 1.14) restart_mapping = bmz_traverse_critical_nodes(bmz, i, &biggest_g_value, &biggest_edge_value, used_edges, visited);
+ else restart_mapping = bmz_traverse_critical_nodes_heuristic(bmz, i, &biggest_g_value, &biggest_edge_value, used_edges, visited);
+ if(restart_mapping) break;
+ }
+ }
+ if(!restart_mapping)
+ {
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "\tTraversing non critical vertices.\n");
+ }
+ bmz_traverse_non_critical_nodes(bmz, used_edges, visited); // non_critical_nodes
+ }
+ else
+ {
+ iterations_map--;
+ if (mph->verbosity) fprintf(stderr, "Restarting mapping step. %u iterations remaining.\n", iterations_map);
+ }
+ free(used_edges);
+ free(visited);
+ }while(restart_mapping && iterations_map > 0);
+ graph_destroy(bmz->graph);
+ bmz->graph = NULL;
+ if (iterations_map == 0)
+ {
+ return NULL;
+ }
+ mphf = (cmph_t *)malloc(sizeof(cmph_t));
+ mphf->algo = mph->algo;
+ bmzf = (bmz_data_t *)malloc(sizeof(bmz_data_t));
+ bmzf->g = bmz->g;
+ bmz->g = NULL; //transfer memory ownership
+ bmzf->hashes = bmz->hashes;
+ bmz->hashes = NULL; //transfer memory ownership
+ bmzf->n = bmz->n;
+ bmzf->m = bmz->m;
+ mphf->data = bmzf;
+ mphf->size = bmz->m;
+
+ DEBUGP("Successfully generated minimal perfect hash\n");
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Successfully generated minimal perfect hash function\n");
+ }
+ return mphf;
+}
+
+static cmph_uint8 bmz_traverse_critical_nodes(bmz_config_data_t *bmz, cmph_uint32 v, cmph_uint32 * biggest_g_value, cmph_uint32 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited)
+{
+ cmph_uint32 next_g;
+ cmph_uint32 u; /* Auxiliary vertex */
+ cmph_uint32 lav; /* lookahead vertex */
+ cmph_uint8 collision;
+ vqueue_t * q = vqueue_new((cmph_uint32)(graph_ncritical_nodes(bmz->graph)) + 1);
+ graph_iterator_t it, it1;
+
+ DEBUGP("Labelling critical vertices\n");
+ bmz->g[v] = (cmph_uint32)ceil ((double)(*biggest_edge_value)/2) - 1;
+ SETBIT(visited, v);
+ next_g = (cmph_uint32)floor((double)(*biggest_edge_value/2)); /* next_g is incremented in the do..while statement*/
+ vqueue_insert(q, v);
+ while(!vqueue_is_empty(q))
+ {
+ v = vqueue_remove(q);
+ it = graph_neighbors_it(bmz->graph, v);
+ while ((u = graph_next_neighbor(bmz->graph, &it)) != GRAPH_NO_NEIGHBOR)
+ {
+ if (graph_node_is_critical(bmz->graph, u) && (!GETBIT(visited,u)))
+ {
+ collision = 1;
+ while(collision) // lookahead to resolve collisions
+ {
+ next_g = *biggest_g_value + 1;
+ it1 = graph_neighbors_it(bmz->graph, u);
+ collision = 0;
+ while((lav = graph_next_neighbor(bmz->graph, &it1)) != GRAPH_NO_NEIGHBOR)
+ {
+ if (graph_node_is_critical(bmz->graph, lav) && GETBIT(visited,lav))
+ {
+ if(next_g + bmz->g[lav] >= bmz->m)
+ {
+ vqueue_destroy(q);
+ return 1; // restart mapping step.
+ }
+ if (GETBIT(used_edges, (next_g + bmz->g[lav])))
+ {
+ collision = 1;
+ break;
+ }
+ }
+ }
+ if (next_g > *biggest_g_value) *biggest_g_value = next_g;
+ }
+ // Marking used edges...
+ it1 = graph_neighbors_it(bmz->graph, u);
+ while((lav = graph_next_neighbor(bmz->graph, &it1)) != GRAPH_NO_NEIGHBOR)
+ {
+ if (graph_node_is_critical(bmz->graph, lav) && GETBIT(visited, lav))
+ {
+ SETBIT(used_edges,(next_g + bmz->g[lav]));
+ if(next_g + bmz->g[lav] > *biggest_edge_value) *biggest_edge_value = next_g + bmz->g[lav];
+ }
+ }
+ bmz->g[u] = next_g; // Labelling vertex u.
+ SETBIT(visited,u);
+ vqueue_insert(q, u);
+ }
+ }
+
+ }
+ vqueue_destroy(q);
+ return 0;
+}
+
+static cmph_uint8 bmz_traverse_critical_nodes_heuristic(bmz_config_data_t *bmz, cmph_uint32 v, cmph_uint32 * biggest_g_value, cmph_uint32 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited)
+{
+ cmph_uint32 next_g;
+ cmph_uint32 u; /* Auxiliary vertex */
+ cmph_uint32 lav; /* lookahead vertex */
+ cmph_uint8 collision;
+ cmph_uint32 * unused_g_values = NULL;
+ cmph_uint32 unused_g_values_capacity = 0;
+ cmph_uint32 nunused_g_values = 0;
+ vqueue_t * q = vqueue_new((cmph_uint32)(0.5*graph_ncritical_nodes(bmz->graph))+1);
+ graph_iterator_t it, it1;
+
+ DEBUGP("Labelling critical vertices\n");
+ bmz->g[v] = (cmph_uint32)ceil ((double)(*biggest_edge_value)/2) - 1;
+ SETBIT(visited, v);
+ next_g = (cmph_uint32)floor((double)(*biggest_edge_value/2)); /* next_g is incremented in the do..while statement*/
+ vqueue_insert(q, v);
+ while(!vqueue_is_empty(q))
+ {
+ v = vqueue_remove(q);
+ it = graph_neighbors_it(bmz->graph, v);
+ while ((u = graph_next_neighbor(bmz->graph, &it)) != GRAPH_NO_NEIGHBOR)
+ {
+ if (graph_node_is_critical(bmz->graph, u) && (!GETBIT(visited,u)))
+ {
+ cmph_uint32 next_g_index = 0;
+ collision = 1;
+ while(collision) // lookahead to resolve collisions
+ {
+ if (next_g_index < nunused_g_values)
+ {
+ next_g = unused_g_values[next_g_index++];
+ }
+ else
+ {
+ next_g = *biggest_g_value + 1;
+ next_g_index = UINT_MAX;
+ }
+ it1 = graph_neighbors_it(bmz->graph, u);
+ collision = 0;
+ while((lav = graph_next_neighbor(bmz->graph, &it1)) != GRAPH_NO_NEIGHBOR)
+ {
+ if (graph_node_is_critical(bmz->graph, lav) && GETBIT(visited,lav))
+ {
+ if(next_g + bmz->g[lav] >= bmz->m)
+ {
+ vqueue_destroy(q);
+ free(unused_g_values);
+ return 1; // restart mapping step.
+ }
+ if (GETBIT(used_edges, (next_g + bmz->g[lav])))
+ {
+ collision = 1;
+ break;
+ }
+ }
+ }
+ if(collision && (next_g > *biggest_g_value)) // saving the current g value stored in next_g.
+ {
+ if(nunused_g_values == unused_g_values_capacity)
+ {
+ unused_g_values = (cmph_uint32 *)realloc(unused_g_values, (unused_g_values_capacity + BUFSIZ)*sizeof(cmph_uint32));
+ unused_g_values_capacity += BUFSIZ;
+ }
+ unused_g_values[nunused_g_values++] = next_g;
+
+ }
+ if (next_g > *biggest_g_value) *biggest_g_value = next_g;
+ }
+ next_g_index--;
+ if (next_g_index < nunused_g_values) unused_g_values[next_g_index] = unused_g_values[--nunused_g_values];
+
+ // Marking used edges...
+ it1 = graph_neighbors_it(bmz->graph, u);
+ while((lav = graph_next_neighbor(bmz->graph, &it1)) != GRAPH_NO_NEIGHBOR)
+ {
+ if (graph_node_is_critical(bmz->graph, lav) && GETBIT(visited, lav))
+ {
+ SETBIT(used_edges,(next_g + bmz->g[lav]));
+ if(next_g + bmz->g[lav] > *biggest_edge_value) *biggest_edge_value = next_g + bmz->g[lav];
+ }
+ }
+ bmz->g[u] = next_g; // Labelling vertex u.
+ SETBIT(visited, u);
+ vqueue_insert(q, u);
+ }
+ }
+
+ }
+ vqueue_destroy(q);
+ free(unused_g_values);
+ return 0;
+}
+
+static cmph_uint32 next_unused_edge(bmz_config_data_t *bmz, cmph_uint8 * used_edges, cmph_uint32 unused_edge_index)
+{
+ while(1)
+ {
+ assert(unused_edge_index < bmz->m);
+ if(GETBIT(used_edges, unused_edge_index)) unused_edge_index ++;
+ else break;
+ }
+ return unused_edge_index;
+}
+
+static void bmz_traverse(bmz_config_data_t *bmz, cmph_uint8 * used_edges, cmph_uint32 v, cmph_uint32 * unused_edge_index, cmph_uint8 * visited)
+{
+ graph_iterator_t it = graph_neighbors_it(bmz->graph, v);
+ cmph_uint32 neighbor = 0;
+ while((neighbor = graph_next_neighbor(bmz->graph, &it)) != GRAPH_NO_NEIGHBOR)
+ {
+ if(GETBIT(visited,neighbor)) continue;
+ //DEBUGP("Visiting neighbor %u\n", neighbor);
+ *unused_edge_index = next_unused_edge(bmz, used_edges, *unused_edge_index);
+ bmz->g[neighbor] = *unused_edge_index - bmz->g[v];
+ //if (bmz->g[neighbor] >= bmz->m) bmz->g[neighbor] += bmz->m;
+ SETBIT(visited, neighbor);
+ (*unused_edge_index)++;
+ bmz_traverse(bmz, used_edges, neighbor, unused_edge_index, visited);
+
+ }
+}
+
+static void bmz_traverse_non_critical_nodes(bmz_config_data_t *bmz, cmph_uint8 * used_edges, cmph_uint8 * visited)
+{
+
+ cmph_uint32 i, v1, v2, unused_edge_index = 0;
+ DEBUGP("Labelling non critical vertices\n");
+ for(i = 0; i < bmz->m; i++)
+ {
+ v1 = graph_vertex_id(bmz->graph, i, 0);
+ v2 = graph_vertex_id(bmz->graph, i, 1);
+ if((GETBIT(visited,v1) && GETBIT(visited,v2)) || (!GETBIT(visited,v1) && !GETBIT(visited,v2))) continue;
+ if(GETBIT(visited,v1)) bmz_traverse(bmz, used_edges, v1, &unused_edge_index, visited);
+ else bmz_traverse(bmz, used_edges, v2, &unused_edge_index, visited);
+
+ }
+
+ for(i = 0; i < bmz->n; i++)
+ {
+ if(!GETBIT(visited,i))
+ {
+ bmz->g[i] = 0;
+ SETBIT(visited, i);
+ bmz_traverse(bmz, used_edges, i, &unused_edge_index, visited);
+ }
+ }
+
+}
+
+static int bmz_gen_edges(cmph_config_t *mph)
+{
+ cmph_uint32 e;
+ bmz_config_data_t *bmz = (bmz_config_data_t *)mph->data;
+ cmph_uint8 multiple_edges = 0;
+ DEBUGP("Generating edges for %u vertices\n", bmz->n);
+ graph_clear_edges(bmz->graph);
+ mph->key_source->rewind(mph->key_source->data);
+ for (e = 0; e < mph->key_source->nkeys; ++e)
+ {
+ cmph_uint32 h1, h2;
+ cmph_uint32 keylen;
+ char *key = NULL;
+ mph->key_source->read(mph->key_source->data, &key, &keylen);
+
+// if (key == NULL)fprintf(stderr, "key = %s -- read BMZ\n", key);
+ h1 = hash(bmz->hashes[0], key, keylen) % bmz->n;
+ h2 = hash(bmz->hashes[1], key, keylen) % bmz->n;
+ if (h1 == h2) if (++h2 >= bmz->n) h2 = 0;
+ if (h1 == h2)
+ {
+ if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e);
+ mph->key_source->dispose(mph->key_source->data, key, keylen);
+ return 0;
+ }
+ //DEBUGP("Adding edge: %u -> %u for key %s\n", h1, h2, key);
+ mph->key_source->dispose(mph->key_source->data, key, keylen);
+// fprintf(stderr, "key = %s -- dispose BMZ\n", key);
+ multiple_edges = graph_contains_edge(bmz->graph, h1, h2);
+ if (mph->verbosity && multiple_edges) fprintf(stderr, "A non simple graph was generated\n");
+ if (multiple_edges) return 0; // checking multiple edge restriction.
+ graph_add_edge(bmz->graph, h1, h2);
+ }
+ return !multiple_edges;
+}
+
+int bmz_dump(cmph_t *mphf, FILE *fd)
+{
+ char *buf = NULL;
+ cmph_uint32 buflen;
+ cmph_uint32 two = 2; //number of hash functions
+ bmz_data_t *data = (bmz_data_t *)mphf->data;
+ register size_t nbytes;
+ __cmph_dump(mphf, fd);
+
+ nbytes = fwrite(&two, sizeof(cmph_uint32), (size_t)1, fd);
+
+ hash_state_dump(data->hashes[0], &buf, &buflen);
+ DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
+ nbytes = fwrite(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
+ nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd);
+ free(buf);
+
+ hash_state_dump(data->hashes[1], &buf, &buflen);
+ DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
+ nbytes = fwrite(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
+ nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd);
+ free(buf);
+
+ nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd);
+ nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
+
+ nbytes = fwrite(data->g, sizeof(cmph_uint32)*(data->n), (size_t)1, fd);
+ #ifdef DEBUG
+ cmph_uint32 i;
+ fprintf(stderr, "G: ");
+ for (i = 0; i < data->n; ++i) fprintf(stderr, "%u ", data->g[i]);
+ fprintf(stderr, "\n");
+ #endif
+ return 1;
+}
+
+void bmz_load(FILE *f, cmph_t *mphf)
+{
+ cmph_uint32 nhashes;
+ char *buf = NULL;
+ cmph_uint32 buflen;
+ cmph_uint32 i;
+ bmz_data_t *bmz = (bmz_data_t *)malloc(sizeof(bmz_data_t));
+ register size_t nbytes;
+ DEBUGP("Loading bmz mphf\n");
+ mphf->data = bmz;
+ nbytes = fread(&nhashes, sizeof(cmph_uint32), (size_t)1, f);
+ bmz->hashes = (hash_state_t **)malloc(sizeof(hash_state_t *)*(nhashes + 1));
+ bmz->hashes[nhashes] = NULL;
+ DEBUGP("Reading %u hashes\n", nhashes);
+ for (i = 0; i < nhashes; ++i)
+ {
+ hash_state_t *state = NULL;
+ nbytes = fread(&buflen, sizeof(cmph_uint32), (size_t)1, f);
+ DEBUGP("Hash state has %u bytes\n", buflen);
+ buf = (char *)malloc((size_t)buflen);
+ nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
+ state = hash_state_load(buf, buflen);
+ bmz->hashes[i] = state;
+ free(buf);
+ }
+
+ DEBUGP("Reading m and n\n");
+ nbytes = fread(&(bmz->n), sizeof(cmph_uint32), (size_t)1, f);
+ nbytes = fread(&(bmz->m), sizeof(cmph_uint32), (size_t)1, f);
+
+ bmz->g = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*bmz->n);
+ nbytes = fread(bmz->g, bmz->n*sizeof(cmph_uint32), (size_t)1, f);
+ #ifdef DEBUG
+ fprintf(stderr, "G: ");
+ for (i = 0; i < bmz->n; ++i) fprintf(stderr, "%u ", bmz->g[i]);
+ fprintf(stderr, "\n");
+ #endif
+ return;
+}
+
+
+cmph_uint32 bmz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
+{
+ bmz_data_t *bmz = mphf->data;
+ cmph_uint32 h1 = hash(bmz->hashes[0], key, keylen) % bmz->n;
+ cmph_uint32 h2 = hash(bmz->hashes[1], key, keylen) % bmz->n;
+ DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
+ if (h1 == h2 && ++h2 > bmz->n) h2 = 0;
+ DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, bmz->g[h1], bmz->g[h2], bmz->m);
+ return bmz->g[h1] + bmz->g[h2];
+}
+void bmz_destroy(cmph_t *mphf)
+{
+ bmz_data_t *data = (bmz_data_t *)mphf->data;
+ free(data->g);
+ hash_state_destroy(data->hashes[0]);
+ hash_state_destroy(data->hashes[1]);
+ free(data->hashes);
+ free(data);
+ free(mphf);
+}
+
+/** \fn void bmz_pack(cmph_t *mphf, void *packed_mphf);
+ * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
+ * \param mphf pointer to the resulting mphf
+ * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
+ */
+void bmz_pack(cmph_t *mphf, void *packed_mphf)
+{
+
+ bmz_data_t *data = (bmz_data_t *)mphf->data;
+ cmph_uint8 * ptr = packed_mphf;
+
+ // packing h1 type
+ CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
+ *((cmph_uint32 *) ptr) = h1_type;
+ ptr += sizeof(cmph_uint32);
+
+ // packing h1
+ hash_state_pack(data->hashes[0], ptr);
+ ptr += hash_state_packed_size(h1_type);
+
+ // packing h2 type
+ CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
+ *((cmph_uint32 *) ptr) = h2_type;
+ ptr += sizeof(cmph_uint32);
+
+ // packing h2
+ hash_state_pack(data->hashes[1], ptr);
+ ptr += hash_state_packed_size(h2_type);
+
+ // packing n
+ *((cmph_uint32 *) ptr) = data->n;
+ ptr += sizeof(data->n);
+
+ // packing g
+ memcpy(ptr, data->g, sizeof(cmph_uint32)*data->n);
+}
+
+/** \fn cmph_uint32 bmz_packed_size(cmph_t *mphf);
+ * \brief Return the amount of space needed to pack mphf.
+ * \param mphf pointer to a mphf
+ * \return the size of the packed function or zero for failures
+ */
+cmph_uint32 bmz_packed_size(cmph_t *mphf)
+{
+ bmz_data_t *data = (bmz_data_t *)mphf->data;
+ CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
+ CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
+
+ return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
+ 3*sizeof(cmph_uint32) + sizeof(cmph_uint32)*data->n);
+}
+
+/** cmph_uint32 bmz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
+ * \brief Use the packed mphf to do a search.
+ * \param packed_mphf pointer to the packed mphf
+ * \param key key to be hashed
+ * \param keylen key legth in bytes
+ * \return The mphf value
+ */
+cmph_uint32 bmz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
+{
+ register cmph_uint8 *h1_ptr = packed_mphf;
+ register CMPH_HASH h1_type = *((cmph_uint32 *)h1_ptr);
+ h1_ptr += 4;
+
+ register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
+ register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr);
+ h2_ptr += 4;
+
+ register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type));
+
+ register cmph_uint32 n = *g_ptr++;
+
+ register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n;
+ register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n;
+ if (h1 == h2 && ++h2 > n) h2 = 0;
+ return (g_ptr[h1] + g_ptr[h2]);
+}
diff --git a/girepository/cmph/bmz.h b/girepository/cmph/bmz.h
new file mode 100644
index 00000000..ee5f61dd
--- /dev/null
+++ b/girepository/cmph/bmz.h
@@ -0,0 +1,42 @@
+#ifndef __CMPH_BMZ_H__
+#define __CMPH_BMZ_H__
+
+#include "cmph.h"
+
+typedef struct __bmz_data_t bmz_data_t;
+typedef struct __bmz_config_data_t bmz_config_data_t;
+
+bmz_config_data_t *bmz_config_new();
+void bmz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
+void bmz_config_destroy(cmph_config_t *mph);
+cmph_t *bmz_new(cmph_config_t *mph, double c);
+
+void bmz_load(FILE *f, cmph_t *mphf);
+int bmz_dump(cmph_t *mphf, FILE *f);
+void bmz_destroy(cmph_t *mphf);
+cmph_uint32 bmz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
+
+/** \fn void bmz_pack(cmph_t *mphf, void *packed_mphf);
+ * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
+ * \param mphf pointer to the resulting mphf
+ * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
+ */
+void bmz_pack(cmph_t *mphf, void *packed_mphf);
+
+/** \fn cmph_uint32 bmz_packed_size(cmph_t *mphf);
+ * \brief Return the amount of space needed to pack mphf.
+ * \param mphf pointer to a mphf
+ * \return the size of the packed function or zero for failures
+ */
+cmph_uint32 bmz_packed_size(cmph_t *mphf);
+
+/** cmph_uint32 bmz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
+ * \brief Use the packed mphf to do a search.
+ * \param packed_mphf pointer to the packed mphf
+ * \param key key to be hashed
+ * \param keylen key legth in bytes
+ * \return The mphf value
+ */
+cmph_uint32 bmz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
+
+#endif
diff --git a/girepository/cmph/bmz8.c b/girepository/cmph/bmz8.c
new file mode 100644
index 00000000..203f4fc1
--- /dev/null
+++ b/girepository/cmph/bmz8.c
@@ -0,0 +1,632 @@
+#include "graph.h"
+#include "bmz8.h"
+#include "cmph_structs.h"
+#include "bmz8_structs.h"
+#include "hash.h"
+#include "vqueue.h"
+#include "bitbool.h"
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+
+//#define DEBUG
+#include "debug.h"
+
+static int bmz8_gen_edges(cmph_config_t *mph);
+static cmph_uint8 bmz8_traverse_critical_nodes(bmz8_config_data_t *bmz8, cmph_uint32 v, cmph_uint8 * biggest_g_value, cmph_uint8 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited);
+static cmph_uint8 bmz8_traverse_critical_nodes_heuristic(bmz8_config_data_t *bmz8, cmph_uint32 v, cmph_uint8 * biggest_g_value, cmph_uint8 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited);
+static void bmz8_traverse_non_critical_nodes(bmz8_config_data_t *bmz8, cmph_uint8 * used_edges, cmph_uint8 * visited);
+
+bmz8_config_data_t *bmz8_config_new()
+{
+ bmz8_config_data_t *bmz8;
+ bmz8 = (bmz8_config_data_t *)malloc(sizeof(bmz8_config_data_t));
+ assert(bmz8);
+ memset(bmz8, 0, sizeof(bmz8_config_data_t));
+ bmz8->hashfuncs[0] = CMPH_HASH_JENKINS;
+ bmz8->hashfuncs[1] = CMPH_HASH_JENKINS;
+ bmz8->g = NULL;
+ bmz8->graph = NULL;
+ bmz8->hashes = NULL;
+ return bmz8;
+}
+
+void bmz8_config_destroy(cmph_config_t *mph)
+{
+ bmz8_config_data_t *data = (bmz8_config_data_t *)mph->data;
+ DEBUGP("Destroying algorithm dependent data\n");
+ free(data);
+}
+
+void bmz8_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
+{
+ bmz8_config_data_t *bmz8 = (bmz8_config_data_t *)mph->data;
+ CMPH_HASH *hashptr = hashfuncs;
+ cmph_uint8 i = 0;
+ while(*hashptr != CMPH_HASH_COUNT)
+ {
+ if (i >= 2) break; //bmz8 only uses two hash functions
+ bmz8->hashfuncs[i] = *hashptr;
+ ++i, ++hashptr;
+ }
+}
+
+cmph_t *bmz8_new(cmph_config_t *mph, double c)
+{
+ cmph_t *mphf = NULL;
+ bmz8_data_t *bmz8f = NULL;
+ cmph_uint8 i;
+ cmph_uint8 iterations;
+ cmph_uint8 iterations_map = 20;
+ cmph_uint8 *used_edges = NULL;
+ cmph_uint8 restart_mapping = 0;
+ cmph_uint8 * visited = NULL;
+ bmz8_config_data_t *bmz8 = (bmz8_config_data_t *)mph->data;
+
+ if (mph->key_source->nkeys >= 256)
+ {
+ if (mph->verbosity) fprintf(stderr, "The number of keys in BMZ8 must be lower than 256.\n");
+ return NULL;
+ }
+ if (c == 0) c = 1.15; // validating restrictions over parameter c.
+ DEBUGP("c: %f\n", c);
+ bmz8->m = (cmph_uint8) mph->key_source->nkeys;
+ bmz8->n = (cmph_uint8) ceil(c * mph->key_source->nkeys);
+ DEBUGP("m (edges): %u n (vertices): %u c: %f\n", bmz8->m, bmz8->n, c);
+ bmz8->graph = graph_new(bmz8->n, bmz8->m);
+ DEBUGP("Created graph\n");
+
+ bmz8->hashes = (hash_state_t **)malloc(sizeof(hash_state_t *)*3);
+ for(i = 0; i < 3; ++i) bmz8->hashes[i] = NULL;
+
+ do
+ {
+ // Mapping step
+ cmph_uint8 biggest_g_value = 0;
+ cmph_uint8 biggest_edge_value = 1;
+ iterations = 100;
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Entering mapping step for mph creation of %u keys with graph sized %u\n", bmz8->m, bmz8->n);
+ }
+ while(1)
+ {
+ int ok;
+ DEBUGP("hash function 1\n");
+ bmz8->hashes[0] = hash_state_new(bmz8->hashfuncs[0], bmz8->n);
+ DEBUGP("hash function 2\n");
+ bmz8->hashes[1] = hash_state_new(bmz8->hashfuncs[1], bmz8->n);
+ DEBUGP("Generating edges\n");
+ ok = bmz8_gen_edges(mph);
+ if (!ok)
+ {
+ --iterations;
+ hash_state_destroy(bmz8->hashes[0]);
+ bmz8->hashes[0] = NULL;
+ hash_state_destroy(bmz8->hashes[1]);
+ bmz8->hashes[1] = NULL;
+ DEBUGP("%u iterations remaining\n", iterations);
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "simple graph creation failure - %u iterations remaining\n", iterations);
+ }
+ if (iterations == 0) break;
+ }
+ else break;
+ }
+ if (iterations == 0)
+ {
+ graph_destroy(bmz8->graph);
+ return NULL;
+ }
+
+ // Ordering step
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Starting ordering step\n");
+ }
+
+ graph_obtain_critical_nodes(bmz8->graph);
+
+ // Searching step
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Starting Searching step.\n");
+ fprintf(stderr, "\tTraversing critical vertices.\n");
+ }
+ DEBUGP("Searching step\n");
+ visited = (cmph_uint8 *)malloc((size_t)bmz8->n/8 + 1);
+ memset(visited, 0, (size_t)bmz8->n/8 + 1);
+ used_edges = (cmph_uint8 *)malloc((size_t)bmz8->m/8 + 1);
+ memset(used_edges, 0, (size_t)bmz8->m/8 + 1);
+ free(bmz8->g);
+ bmz8->g = (cmph_uint8 *)calloc((size_t)bmz8->n, sizeof(cmph_uint8));
+ assert(bmz8->g);
+ for (i = 0; i < bmz8->n; ++i) // critical nodes
+ {
+ if (graph_node_is_critical(bmz8->graph, i) && (!GETBIT(visited,i)))
+ {
+ if(c > 1.14) restart_mapping = bmz8_traverse_critical_nodes(bmz8, i, &biggest_g_value, &biggest_edge_value, used_edges, visited);
+ else restart_mapping = bmz8_traverse_critical_nodes_heuristic(bmz8, i, &biggest_g_value, &biggest_edge_value, used_edges, visited);
+ if(restart_mapping) break;
+ }
+ }
+ if(!restart_mapping)
+ {
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "\tTraversing non critical vertices.\n");
+ }
+ bmz8_traverse_non_critical_nodes(bmz8, used_edges, visited); // non_critical_nodes
+ }
+ else
+ {
+ iterations_map--;
+ if (mph->verbosity) fprintf(stderr, "Restarting mapping step. %u iterations remaining.\n", iterations_map);
+ }
+
+ free(used_edges);
+ free(visited);
+
+ }while(restart_mapping && iterations_map > 0);
+ graph_destroy(bmz8->graph);
+ bmz8->graph = NULL;
+ if (iterations_map == 0)
+ {
+ return NULL;
+ }
+ mphf = (cmph_t *)malloc(sizeof(cmph_t));
+ mphf->algo = mph->algo;
+ bmz8f = (bmz8_data_t *)malloc(sizeof(bmz8_data_t));
+ bmz8f->g = bmz8->g;
+ bmz8->g = NULL; //transfer memory ownership
+ bmz8f->hashes = bmz8->hashes;
+ bmz8->hashes = NULL; //transfer memory ownership
+ bmz8f->n = bmz8->n;
+ bmz8f->m = bmz8->m;
+ mphf->data = bmz8f;
+ mphf->size = bmz8->m;
+ DEBUGP("Successfully generated minimal perfect hash\n");
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Successfully generated minimal perfect hash function\n");
+ }
+ return mphf;
+}
+
+static cmph_uint8 bmz8_traverse_critical_nodes(bmz8_config_data_t *bmz8, cmph_uint32 v, cmph_uint8 * biggest_g_value, cmph_uint8 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited)
+{
+ cmph_uint8 next_g;
+ cmph_uint32 u; /* Auxiliary vertex */
+ cmph_uint32 lav; /* lookahead vertex */
+ cmph_uint8 collision;
+ vqueue_t * q = vqueue_new((cmph_uint32)(graph_ncritical_nodes(bmz8->graph)));
+ graph_iterator_t it, it1;
+
+ DEBUGP("Labelling critical vertices\n");
+ bmz8->g[v] = (cmph_uint8)(ceil ((double)(*biggest_edge_value)/2) - 1);
+ SETBIT(visited, v);
+ next_g = (cmph_uint8)floor((double)(*biggest_edge_value/2)); /* next_g is incremented in the do..while statement*/
+ vqueue_insert(q, v);
+ while(!vqueue_is_empty(q))
+ {
+ v = vqueue_remove(q);
+ it = graph_neighbors_it(bmz8->graph, v);
+ while ((u = graph_next_neighbor(bmz8->graph, &it)) != GRAPH_NO_NEIGHBOR)
+ {
+ if (graph_node_is_critical(bmz8->graph, u) && (!GETBIT(visited,u)))
+ {
+ collision = 1;
+ while(collision) // lookahead to resolve collisions
+ {
+ next_g = (cmph_uint8)(*biggest_g_value + 1);
+ it1 = graph_neighbors_it(bmz8->graph, u);
+ collision = 0;
+ while((lav = graph_next_neighbor(bmz8->graph, &it1)) != GRAPH_NO_NEIGHBOR)
+ {
+ if (graph_node_is_critical(bmz8->graph, lav) && GETBIT(visited,lav))
+ {
+ if(next_g + bmz8->g[lav] >= bmz8->m)
+ {
+ vqueue_destroy(q);
+ return 1; // restart mapping step.
+ }
+ if (GETBIT(used_edges, (next_g + bmz8->g[lav])))
+ {
+ collision = 1;
+ break;
+ }
+ }
+ }
+ if (next_g > *biggest_g_value) *biggest_g_value = next_g;
+ }
+ // Marking used edges...
+ it1 = graph_neighbors_it(bmz8->graph, u);
+ while((lav = graph_next_neighbor(bmz8->graph, &it1)) != GRAPH_NO_NEIGHBOR)
+ {
+ if (graph_node_is_critical(bmz8->graph, lav) && GETBIT(visited, lav))
+ {
+ SETBIT(used_edges,(next_g + bmz8->g[lav]));
+
+ if(next_g + bmz8->g[lav] > *biggest_edge_value)
+ *biggest_edge_value = (cmph_uint8)(next_g + bmz8->g[lav]);
+ }
+ }
+ bmz8->g[u] = next_g; // Labelling vertex u.
+ SETBIT(visited,u);
+ vqueue_insert(q, u);
+ }
+ }
+
+ }
+ vqueue_destroy(q);
+ return 0;
+}
+
+static cmph_uint8 bmz8_traverse_critical_nodes_heuristic(bmz8_config_data_t *bmz8, cmph_uint32 v, cmph_uint8 * biggest_g_value, cmph_uint8 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited)
+{
+ cmph_uint8 next_g;
+ cmph_uint32 u;
+ cmph_uint32 lav;
+ cmph_uint8 collision;
+ cmph_uint8 * unused_g_values = NULL;
+ cmph_uint8 unused_g_values_capacity = 0;
+ cmph_uint8 nunused_g_values = 0;
+ vqueue_t * q = vqueue_new((cmph_uint32)(graph_ncritical_nodes(bmz8->graph)));
+ graph_iterator_t it, it1;
+
+ DEBUGP("Labelling critical vertices\n");
+ bmz8->g[v] = (cmph_uint8)(ceil ((double)(*biggest_edge_value)/2) - 1);
+ SETBIT(visited, v);
+ next_g = (cmph_uint8)floor((double)(*biggest_edge_value/2));
+ vqueue_insert(q, v);
+ while(!vqueue_is_empty(q))
+ {
+ v = vqueue_remove(q);
+ it = graph_neighbors_it(bmz8->graph, v);
+ while ((u = graph_next_neighbor(bmz8->graph, &it)) != GRAPH_NO_NEIGHBOR)
+ {
+ if (graph_node_is_critical(bmz8->graph, u) && (!GETBIT(visited,u)))
+ {
+ cmph_uint8 next_g_index = 0;
+ collision = 1;
+ while(collision) // lookahead to resolve collisions
+ {
+ if (next_g_index < nunused_g_values)
+ {
+ next_g = unused_g_values[next_g_index++];
+ }
+ else
+ {
+ next_g = (cmph_uint8)(*biggest_g_value + 1);
+ next_g_index = 255;//UINT_MAX;
+ }
+ it1 = graph_neighbors_it(bmz8->graph, u);
+ collision = 0;
+ while((lav = graph_next_neighbor(bmz8->graph, &it1)) != GRAPH_NO_NEIGHBOR)
+ {
+ if (graph_node_is_critical(bmz8->graph, lav) && GETBIT(visited,lav))
+ {
+ if(next_g + bmz8->g[lav] >= bmz8->m)
+ {
+ vqueue_destroy(q);
+ free(unused_g_values);
+ return 1; // restart mapping step.
+ }
+ if (GETBIT(used_edges, (next_g + bmz8->g[lav])))
+ {
+ collision = 1;
+ break;
+ }
+ }
+ }
+ if(collision && (next_g > *biggest_g_value)) // saving the current g value stored in next_g.
+ {
+ if(nunused_g_values == unused_g_values_capacity)
+ {
+ unused_g_values = (cmph_uint8*)realloc(unused_g_values, ((size_t)(unused_g_values_capacity + BUFSIZ))*sizeof(cmph_uint8));
+ unused_g_values_capacity += (cmph_uint8)BUFSIZ;
+ }
+ unused_g_values[nunused_g_values++] = next_g;
+
+ }
+ if (next_g > *biggest_g_value) *biggest_g_value = next_g;
+ }
+
+ next_g_index--;
+ if (next_g_index < nunused_g_values) unused_g_values[next_g_index] = unused_g_values[--nunused_g_values];
+
+ // Marking used edges...
+ it1 = graph_neighbors_it(bmz8->graph, u);
+ while((lav = graph_next_neighbor(bmz8->graph, &it1)) != GRAPH_NO_NEIGHBOR)
+ {
+ if (graph_node_is_critical(bmz8->graph, lav) && GETBIT(visited, lav))
+ {
+ SETBIT(used_edges,(next_g + bmz8->g[lav]));
+ if(next_g + bmz8->g[lav] > *biggest_edge_value)
+ *biggest_edge_value = (cmph_uint8)(next_g + bmz8->g[lav]);
+ }
+ }
+
+ bmz8->g[u] = next_g; // Labelling vertex u.
+ SETBIT(visited, u);
+ vqueue_insert(q, u);
+
+ }
+ }
+
+ }
+ vqueue_destroy(q);
+ free(unused_g_values);
+ return 0;
+}
+
+static cmph_uint8 next_unused_edge(bmz8_config_data_t *bmz8, cmph_uint8 * used_edges, cmph_uint32 unused_edge_index)
+{
+ while(1)
+ {
+ assert(unused_edge_index < bmz8->m);
+ if(GETBIT(used_edges, unused_edge_index)) unused_edge_index ++;
+ else break;
+ }
+ return (cmph_uint8)unused_edge_index;
+}
+
+static void bmz8_traverse(bmz8_config_data_t *bmz8, cmph_uint8 * used_edges, cmph_uint32 v, cmph_uint8 * unused_edge_index, cmph_uint8 * visited)
+{
+ graph_iterator_t it = graph_neighbors_it(bmz8->graph, v);
+ cmph_uint32 neighbor = 0;
+ while((neighbor = graph_next_neighbor(bmz8->graph, &it)) != GRAPH_NO_NEIGHBOR)
+ {
+ if(GETBIT(visited,neighbor)) continue;
+ //DEBUGP("Visiting neighbor %u\n", neighbor);
+ *unused_edge_index = next_unused_edge(bmz8, used_edges, *unused_edge_index);
+ bmz8->g[neighbor] = (cmph_uint8)(*unused_edge_index - bmz8->g[v]);
+ //if (bmz8->g[neighbor] >= bmz8->m) bmz8->g[neighbor] += bmz8->m;
+ SETBIT(visited, neighbor);
+ (*unused_edge_index)++;
+ bmz8_traverse(bmz8, used_edges, neighbor, unused_edge_index, visited);
+
+ }
+}
+
+static void bmz8_traverse_non_critical_nodes(bmz8_config_data_t *bmz8, cmph_uint8 * used_edges, cmph_uint8 * visited)
+{
+
+ cmph_uint8 i, v1, v2, unused_edge_index = 0;
+ DEBUGP("Labelling non critical vertices\n");
+ for(i = 0; i < bmz8->m; i++)
+ {
+ v1 = (cmph_uint8)graph_vertex_id(bmz8->graph, i, 0);
+ v2 = (cmph_uint8)graph_vertex_id(bmz8->graph, i, 1);
+ if((GETBIT(visited,v1) && GETBIT(visited,v2)) || (!GETBIT(visited,v1) && !GETBIT(visited,v2))) continue;
+ if(GETBIT(visited,v1)) bmz8_traverse(bmz8, used_edges, v1, &unused_edge_index, visited);
+ else bmz8_traverse(bmz8, used_edges, v2, &unused_edge_index, visited);
+
+ }
+
+ for(i = 0; i < bmz8->n; i++)
+ {
+ if(!GETBIT(visited,i))
+ {
+ bmz8->g[i] = 0;
+ SETBIT(visited, i);
+ bmz8_traverse(bmz8, used_edges, i, &unused_edge_index, visited);
+ }
+ }
+
+}
+
+static int bmz8_gen_edges(cmph_config_t *mph)
+{
+ cmph_uint8 e;
+ bmz8_config_data_t *bmz8 = (bmz8_config_data_t *)mph->data;
+ cmph_uint8 multiple_edges = 0;
+ DEBUGP("Generating edges for %u vertices\n", bmz8->n);
+ graph_clear_edges(bmz8->graph);
+ mph->key_source->rewind(mph->key_source->data);
+ for (e = 0; e < mph->key_source->nkeys; ++e)
+ {
+ cmph_uint8 h1, h2;
+ cmph_uint32 keylen;
+ char *key = NULL;
+ mph->key_source->read(mph->key_source->data, &key, &keylen);
+
+// if (key == NULL)fprintf(stderr, "key = %s -- read BMZ\n", key);
+ h1 = (cmph_uint8)(hash(bmz8->hashes[0], key, keylen) % bmz8->n);
+ h2 = (cmph_uint8)(hash(bmz8->hashes[1], key, keylen) % bmz8->n);
+ if (h1 == h2) if (++h2 >= bmz8->n) h2 = 0;
+ if (h1 == h2)
+ {
+ if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e);
+ mph->key_source->dispose(mph->key_source->data, key, keylen);
+ return 0;
+ }
+ //DEBUGP("Adding edge: %u -> %u for key %s\n", h1, h2, key);
+ mph->key_source->dispose(mph->key_source->data, key, keylen);
+// fprintf(stderr, "key = %s -- dispose BMZ\n", key);
+ multiple_edges = graph_contains_edge(bmz8->graph, h1, h2);
+ if (mph->verbosity && multiple_edges) fprintf(stderr, "A non simple graph was generated\n");
+ if (multiple_edges) return 0; // checking multiple edge restriction.
+ graph_add_edge(bmz8->graph, h1, h2);
+ }
+ return !multiple_edges;
+}
+
+int bmz8_dump(cmph_t *mphf, FILE *fd)
+{
+ char *buf = NULL;
+ cmph_uint32 buflen;
+ cmph_uint8 two = 2; //number of hash functions
+ bmz8_data_t *data = (bmz8_data_t *)mphf->data;
+ register size_t nbytes;
+ __cmph_dump(mphf, fd);
+
+ nbytes = fwrite(&two, sizeof(cmph_uint8), (size_t)1, fd);
+
+ hash_state_dump(data->hashes[0], &buf, &buflen);
+ DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
+ nbytes = fwrite(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
+ nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd);
+ free(buf);
+
+ hash_state_dump(data->hashes[1], &buf, &buflen);
+ DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
+ nbytes = fwrite(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
+ nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd);
+ free(buf);
+
+ nbytes = fwrite(&(data->n), sizeof(cmph_uint8), (size_t)1, fd);
+ nbytes = fwrite(&(data->m), sizeof(cmph_uint8), (size_t)1, fd);
+
+ nbytes = fwrite(data->g, sizeof(cmph_uint8)*(data->n), (size_t)1, fd);
+/* #ifdef DEBUG
+ fprintf(stderr, "G: ");
+ for (i = 0; i < data->n; ++i) fprintf(stderr, "%u ", data->g[i]);
+ fprintf(stderr, "\n");
+ #endif*/
+ return 1;
+}
+
+void bmz8_load(FILE *f, cmph_t *mphf)
+{
+ cmph_uint8 nhashes;
+ char *buf = NULL;
+ cmph_uint32 buflen;
+ cmph_uint8 i;
+ register size_t nbytes;
+ bmz8_data_t *bmz8 = (bmz8_data_t *)malloc(sizeof(bmz8_data_t));
+
+ DEBUGP("Loading bmz8 mphf\n");
+ mphf->data = bmz8;
+ nbytes = fread(&nhashes, sizeof(cmph_uint8), (size_t)1, f);
+ bmz8->hashes = (hash_state_t **)malloc(sizeof(hash_state_t *)*(size_t)(nhashes + 1));
+ bmz8->hashes[nhashes] = NULL;
+ DEBUGP("Reading %u hashes\n", nhashes);
+ for (i = 0; i < nhashes; ++i)
+ {
+ hash_state_t *state = NULL;
+ nbytes = fread(&buflen, sizeof(cmph_uint32), (size_t)1, f);
+ DEBUGP("Hash state has %u bytes\n", buflen);
+ buf = (char *)malloc((size_t)buflen);
+ nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
+ state = hash_state_load(buf, buflen);
+ bmz8->hashes[i] = state;
+ free(buf);
+ }
+
+ DEBUGP("Reading m and n\n");
+ nbytes = fread(&(bmz8->n), sizeof(cmph_uint8), (size_t)1, f);
+ nbytes = fread(&(bmz8->m), sizeof(cmph_uint8), (size_t)1, f);
+
+ bmz8->g = (cmph_uint8 *)malloc(sizeof(cmph_uint8)*bmz8->n);
+ nbytes = fread(bmz8->g, bmz8->n*sizeof(cmph_uint8), (size_t)1, f);
+ #ifdef DEBUG
+ fprintf(stderr, "G: ");
+ for (i = 0; i < bmz8->n; ++i) fprintf(stderr, "%u ", bmz8->g[i]);
+ fprintf(stderr, "\n");
+ #endif
+ return;
+}
+
+
+cmph_uint8 bmz8_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
+{
+ bmz8_data_t *bmz8 = mphf->data;
+ cmph_uint8 h1 = (cmph_uint8)(hash(bmz8->hashes[0], key, keylen) % bmz8->n);
+ cmph_uint8 h2 = (cmph_uint8)(hash(bmz8->hashes[1], key, keylen) % bmz8->n);
+ DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
+ if (h1 == h2 && ++h2 > bmz8->n) h2 = 0;
+ DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, bmz8->g[h1], bmz8->g[h2], bmz8->m);
+ return (cmph_uint8)(bmz8->g[h1] + bmz8->g[h2]);
+}
+void bmz8_destroy(cmph_t *mphf)
+{
+ bmz8_data_t *data = (bmz8_data_t *)mphf->data;
+ free(data->g);
+ hash_state_destroy(data->hashes[0]);
+ hash_state_destroy(data->hashes[1]);
+ free(data->hashes);
+ free(data);
+ free(mphf);
+}
+
+/** \fn void bmz8_pack(cmph_t *mphf, void *packed_mphf);
+ * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
+ * \param mphf pointer to the resulting mphf
+ * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
+ */
+void bmz8_pack(cmph_t *mphf, void *packed_mphf)
+{
+ bmz8_data_t *data = (bmz8_data_t *)mphf->data;
+ cmph_uint8 * ptr = packed_mphf;
+
+ // packing h1 type
+ CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
+ *((cmph_uint32 *) ptr) = h1_type;
+ ptr += sizeof(cmph_uint32);
+
+ // packing h1
+ hash_state_pack(data->hashes[0], ptr);
+ ptr += hash_state_packed_size(h1_type);
+
+ // packing h2 type
+ CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
+ *((cmph_uint32 *) ptr) = h2_type;
+ ptr += sizeof(cmph_uint32);
+
+ // packing h2
+ hash_state_pack(data->hashes[1], ptr);
+ ptr += hash_state_packed_size(h2_type);
+
+ // packing n
+ *ptr++ = data->n;
+
+ // packing g
+ memcpy(ptr, data->g, sizeof(cmph_uint8)*data->n);
+}
+
+/** \fn cmph_uint32 bmz8_packed_size(cmph_t *mphf);
+ * \brief Return the amount of space needed to pack mphf.
+ * \param mphf pointer to a mphf
+ * \return the size of the packed function or zero for failures
+ */
+cmph_uint32 bmz8_packed_size(cmph_t *mphf)
+{
+ bmz8_data_t *data = (bmz8_data_t *)mphf->data;
+ CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
+ CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
+
+ return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
+ 2*sizeof(cmph_uint32) + sizeof(cmph_uint8) + sizeof(cmph_uint8)*data->n);
+}
+
+/** cmph_uint8 bmz8_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
+ * \brief Use the packed mphf to do a search.
+ * \param packed_mphf pointer to the packed mphf
+ * \param key key to be hashed
+ * \param keylen key legth in bytes
+ * \return The mphf value
+ */
+cmph_uint8 bmz8_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
+{
+ register cmph_uint8 *h1_ptr = packed_mphf;
+ register CMPH_HASH h1_type = *((cmph_uint32 *)h1_ptr);
+ h1_ptr += 4;
+
+ register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
+ register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr);
+ h2_ptr += 4;
+
+ register cmph_uint8 *g_ptr = h2_ptr + hash_state_packed_size(h2_type);
+
+ register cmph_uint8 n = *g_ptr++;
+
+ register cmph_uint8 h1 = (cmph_uint8)(hash_packed(h1_ptr, h1_type, key, keylen) % n);
+ register cmph_uint8 h2 = (cmph_uint8)(hash_packed(h2_ptr, h2_type, key, keylen) % n);
+ DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
+ if (h1 == h2 && ++h2 > n) h2 = 0;
+ return (cmph_uint8)(g_ptr[h1] + g_ptr[h2]);
+}
diff --git a/girepository/cmph/bmz8.h b/girepository/cmph/bmz8.h
new file mode 100644
index 00000000..5456759e
--- /dev/null
+++ b/girepository/cmph/bmz8.h
@@ -0,0 +1,42 @@
+#ifndef __CMPH_BMZ8_H__
+#define __CMPH_BMZ8_H__
+
+#include "cmph.h"
+
+typedef struct __bmz8_data_t bmz8_data_t;
+typedef struct __bmz8_config_data_t bmz8_config_data_t;
+
+bmz8_config_data_t *bmz8_config_new();
+void bmz8_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
+void bmz8_config_destroy(cmph_config_t *mph);
+cmph_t *bmz8_new(cmph_config_t *mph, double c);
+
+void bmz8_load(FILE *f, cmph_t *mphf);
+int bmz8_dump(cmph_t *mphf, FILE *f);
+void bmz8_destroy(cmph_t *mphf);
+cmph_uint8 bmz8_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
+
+/** \fn void bmz8_pack(cmph_t *mphf, void *packed_mphf);
+ * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
+ * \param mphf pointer to the resulting mphf
+ * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
+ */
+void bmz8_pack(cmph_t *mphf, void *packed_mphf);
+
+/** \fn cmph_uint32 bmz8_packed_size(cmph_t *mphf);
+ * \brief Return the amount of space needed to pack mphf.
+ * \param mphf pointer to a mphf
+ * \return the size of the packed function or zero for failures
+ */
+cmph_uint32 bmz8_packed_size(cmph_t *mphf);
+
+/** cmph_uint8 bmz8_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
+ * \brief Use the packed mphf to do a search.
+ * \param packed_mphf pointer to the packed mphf
+ * \param key key to be hashed
+ * \param keylen key legth in bytes
+ * \return The mphf value
+ */
+cmph_uint8 bmz8_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
+
+#endif
diff --git a/girepository/cmph/bmz8_structs.h b/girepository/cmph/bmz8_structs.h
new file mode 100644
index 00000000..408b5299
--- /dev/null
+++ b/girepository/cmph/bmz8_structs.h
@@ -0,0 +1,25 @@
+#ifndef __CMPH_BMZ8_STRUCTS_H__
+#define __CMPH_BMZ8_STRUCTS_H__
+
+#include "hash_state.h"
+
+struct __bmz8_data_t
+{
+ cmph_uint8 m; //edges (words) count
+ cmph_uint8 n; //vertex count
+ cmph_uint8 *g;
+ hash_state_t **hashes;
+};
+
+
+struct __bmz8_config_data_t
+{
+ CMPH_HASH hashfuncs[2];
+ cmph_uint8 m; //edges (words) count
+ cmph_uint8 n; //vertex count
+ graph_t *graph;
+ cmph_uint8 *g;
+ hash_state_t **hashes;
+};
+
+#endif
diff --git a/girepository/cmph/bmz_structs.h b/girepository/cmph/bmz_structs.h
new file mode 100644
index 00000000..67065a00
--- /dev/null
+++ b/girepository/cmph/bmz_structs.h
@@ -0,0 +1,25 @@
+#ifndef __CMPH_BMZ_STRUCTS_H__
+#define __CMPH_BMZ_STRUCTS_H__
+
+#include "hash_state.h"
+
+struct __bmz_data_t
+{
+ cmph_uint32 m; //edges (words) count
+ cmph_uint32 n; //vertex count
+ cmph_uint32 *g;
+ hash_state_t **hashes;
+};
+
+
+struct __bmz_config_data_t
+{
+ CMPH_HASH hashfuncs[2];
+ cmph_uint32 m; //edges (words) count
+ cmph_uint32 n; //vertex count
+ graph_t *graph;
+ cmph_uint32 *g;
+ hash_state_t **hashes;
+};
+
+#endif
diff --git a/girepository/cmph/brz.c b/girepository/cmph/brz.c
new file mode 100755
index 00000000..eb89ac06
--- /dev/null
+++ b/girepository/cmph/brz.c
@@ -0,0 +1,985 @@
+#include "graph.h"
+#include "fch.h"
+#include "fch_structs.h"
+#include "bmz8.h"
+#include "bmz8_structs.h"
+#include "brz.h"
+#include "cmph_structs.h"
+#include "brz_structs.h"
+#include "buffer_manager.h"
+#include "cmph.h"
+#include "hash.h"
+#include "bitbool.h"
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#define MAX_BUCKET_SIZE 255
+//#define DEBUG
+#include "debug.h"
+
+static int brz_gen_mphf(cmph_config_t *mph);
+static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n);
+static void brz_destroy_keys_vd(cmph_uint8 ** keys_vd, cmph_uint32 nkeys);
+static char * brz_copy_partial_fch_mphf(brz_config_data_t *brz, fch_data_t * fchf, cmph_uint32 index, cmph_uint32 *buflen);
+static char * brz_copy_partial_bmz8_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_uint32 *buflen);
+brz_config_data_t *brz_config_new()
+{
+ brz_config_data_t *brz = NULL;
+ brz = (brz_config_data_t *)malloc(sizeof(brz_config_data_t));
+ brz->algo = CMPH_FCH;
+ brz->b = 128;
+ brz->hashfuncs[0] = CMPH_HASH_JENKINS;
+ brz->hashfuncs[1] = CMPH_HASH_JENKINS;
+ brz->hashfuncs[2] = CMPH_HASH_JENKINS;
+ brz->size = NULL;
+ brz->offset = NULL;
+ brz->g = NULL;
+ brz->h1 = NULL;
+ brz->h2 = NULL;
+ brz->h0 = NULL;
+ brz->memory_availability = 1024*1024;
+ brz->tmp_dir = (cmph_uint8 *)calloc((size_t)10, sizeof(cmph_uint8));
+ brz->mphf_fd = NULL;
+ strcpy((char *)(brz->tmp_dir), "/var/tmp/");
+ assert(brz);
+ return brz;
+}
+
+void brz_config_destroy(cmph_config_t *mph)
+{
+ brz_config_data_t *data = (brz_config_data_t *)mph->data;
+ free(data->tmp_dir);
+ DEBUGP("Destroying algorithm dependent data\n");
+ free(data);
+}
+
+void brz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
+{
+ brz_config_data_t *brz = (brz_config_data_t *)mph->data;
+ CMPH_HASH *hashptr = hashfuncs;
+ cmph_uint32 i = 0;
+ while(*hashptr != CMPH_HASH_COUNT)
+ {
+ if (i >= 3) break; //brz only uses three hash functions
+ brz->hashfuncs[i] = *hashptr;
+ ++i, ++hashptr;
+ }
+}
+
+void brz_config_set_memory_availability(cmph_config_t *mph, cmph_uint32 memory_availability)
+{
+ brz_config_data_t *brz = (brz_config_data_t *)mph->data;
+ if(memory_availability > 0) brz->memory_availability = memory_availability*1024*1024;
+}
+
+void brz_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir)
+{
+ brz_config_data_t *brz = (brz_config_data_t *)mph->data;
+ if(tmp_dir)
+ {
+ size_t len = strlen((char *)tmp_dir);
+ free(brz->tmp_dir);
+ if(tmp_dir[len-1] != '/')
+ {
+ brz->tmp_dir = (cmph_uint8 *)calloc((size_t)len+2, sizeof(cmph_uint8));
+ sprintf((char *)(brz->tmp_dir), "%s/", (char *)tmp_dir);
+ }
+ else
+ {
+ brz->tmp_dir = (cmph_uint8 *)calloc((size_t)len+1, sizeof(cmph_uint8));
+ sprintf((char *)(brz->tmp_dir), "%s", (char *)tmp_dir);
+ }
+
+ }
+}
+
+void brz_config_set_mphf_fd(cmph_config_t *mph, FILE *mphf_fd)
+{
+ brz_config_data_t *brz = (brz_config_data_t *)mph->data;
+ brz->mphf_fd = mphf_fd;
+ assert(brz->mphf_fd);
+}
+
+void brz_config_set_b(cmph_config_t *mph, cmph_uint32 b)
+{
+ brz_config_data_t *brz = (brz_config_data_t *)mph->data;
+ if(b <= 64 || b >= 175)
+ {
+ b = 128;
+ }
+ brz->b = (cmph_uint8)b;
+}
+
+void brz_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo)
+{
+ if (algo == CMPH_BMZ8 || algo == CMPH_FCH) // supported algorithms
+ {
+ brz_config_data_t *brz = (brz_config_data_t *)mph->data;
+ brz->algo = algo;
+ }
+}
+
+cmph_t *brz_new(cmph_config_t *mph, double c)
+{
+ cmph_t *mphf = NULL;
+ brz_data_t *brzf = NULL;
+ cmph_uint32 i;
+ cmph_uint32 iterations = 20;
+
+ DEBUGP("c: %f\n", c);
+ brz_config_data_t *brz = (brz_config_data_t *)mph->data;
+ switch(brz->algo) // validating restrictions over parameter c.
+ {
+ case CMPH_BMZ8:
+ if (c == 0 || c >= 2.0) c = 1;
+ break;
+ case CMPH_FCH:
+ if (c <= 2.0) c = 2.6;
+ break;
+ default:
+ assert(0);
+ }
+ brz->c = c;
+ brz->m = mph->key_source->nkeys;
+ DEBUGP("m: %u\n", brz->m);
+ brz->k = (cmph_uint32)ceil(brz->m/((double)brz->b));
+ DEBUGP("k: %u\n", brz->k);
+ brz->size = (cmph_uint8 *) calloc((size_t)brz->k, sizeof(cmph_uint8));
+
+ // Clustering the keys by graph id.
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Partioning the set of keys.\n");
+ }
+
+ while(1)
+ {
+ int ok;
+ DEBUGP("hash function 3\n");
+ brz->h0 = hash_state_new(brz->hashfuncs[2], brz->k);
+ DEBUGP("Generating graphs\n");
+ ok = brz_gen_mphf(mph);
+ if (!ok)
+ {
+ --iterations;
+ hash_state_destroy(brz->h0);
+ brz->h0 = NULL;
+ DEBUGP("%u iterations remaining to create the graphs in a external file\n", iterations);
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Failure: A graph with more than 255 keys was created - %u iterations remaining\n", iterations);
+ }
+ if (iterations == 0) break;
+ }
+ else break;
+ }
+ if (iterations == 0)
+ {
+ DEBUGP("Graphs with more than 255 keys were created in all 20 iterations\n");
+ free(brz->size);
+ return NULL;
+ }
+ DEBUGP("Graphs generated\n");
+
+ brz->offset = (cmph_uint32 *)calloc((size_t)brz->k, sizeof(cmph_uint32));
+ for (i = 1; i < brz->k; ++i)
+ {
+ brz->offset[i] = brz->size[i-1] + brz->offset[i-1];
+ }
+ // Generating a mphf
+ mphf = (cmph_t *)malloc(sizeof(cmph_t));
+ mphf->algo = mph->algo;
+ brzf = (brz_data_t *)malloc(sizeof(brz_data_t));
+ brzf->g = brz->g;
+ brz->g = NULL; //transfer memory ownership
+ brzf->h1 = brz->h1;
+ brz->h1 = NULL; //transfer memory ownership
+ brzf->h2 = brz->h2;
+ brz->h2 = NULL; //transfer memory ownership
+ brzf->h0 = brz->h0;
+ brz->h0 = NULL; //transfer memory ownership
+ brzf->size = brz->size;
+ brz->size = NULL; //transfer memory ownership
+ brzf->offset = brz->offset;
+ brz->offset = NULL; //transfer memory ownership
+ brzf->k = brz->k;
+ brzf->c = brz->c;
+ brzf->m = brz->m;
+ brzf->algo = brz->algo;
+ mphf->data = brzf;
+ mphf->size = brz->m;
+ DEBUGP("Successfully generated minimal perfect hash\n");
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Successfully generated minimal perfect hash function\n");
+ }
+ return mphf;
+}
+
+static int brz_gen_mphf(cmph_config_t *mph)
+{
+ cmph_uint32 i, e, error;
+ brz_config_data_t *brz = (brz_config_data_t *)mph->data;
+ cmph_uint32 memory_usage = 0;
+ cmph_uint32 nkeys_in_buffer = 0;
+ cmph_uint8 *buffer = (cmph_uint8 *)malloc((size_t)brz->memory_availability);
+ cmph_uint32 *buckets_size = (cmph_uint32 *)calloc((size_t)brz->k, sizeof(cmph_uint32));
+ cmph_uint32 *keys_index = NULL;
+ cmph_uint8 **buffer_merge = NULL;
+ cmph_uint32 *buffer_h0 = NULL;
+ cmph_uint32 nflushes = 0;
+ cmph_uint32 h0;
+ register size_t nbytes;
+ FILE * tmp_fd = NULL;
+ buffer_manager_t * buff_manager = NULL;
+ char *filename = NULL;
+ char *key = NULL;
+ cmph_uint32 keylen;
+ cmph_uint32 cur_bucket = 0;
+ cmph_uint8 nkeys_vd = 0;
+ cmph_uint8 ** keys_vd = NULL;
+
+ mph->key_source->rewind(mph->key_source->data);
+ DEBUGP("Generating graphs from %u keys\n", brz->m);
+ // Partitioning
+ for (e = 0; e < brz->m; ++e)
+ {
+ mph->key_source->read(mph->key_source->data, &key, &keylen);
+
+ /* Buffers management */
+ if (memory_usage + keylen + sizeof(keylen) > brz->memory_availability) // flush buffers
+ {
+ if(mph->verbosity)
+ {
+ fprintf(stderr, "Flushing %u\n", nkeys_in_buffer);
+ }
+ cmph_uint32 value = buckets_size[0];
+ cmph_uint32 sum = 0;
+ cmph_uint32 keylen1 = 0;
+ buckets_size[0] = 0;
+ for(i = 1; i < brz->k; i++)
+ {
+ if(buckets_size[i] == 0) continue;
+ sum += value;
+ value = buckets_size[i];
+ buckets_size[i] = sum;
+
+ }
+ memory_usage = 0;
+ keys_index = (cmph_uint32 *)calloc((size_t)nkeys_in_buffer, sizeof(cmph_uint32));
+ for(i = 0; i < nkeys_in_buffer; i++)
+ {
+ memcpy(&keylen1, buffer + memory_usage, sizeof(keylen1));
+ h0 = hash(brz->h0, (char *)(buffer + memory_usage + sizeof(keylen1)), keylen1) % brz->k;
+ keys_index[buckets_size[h0]] = memory_usage;
+ buckets_size[h0]++;
+ memory_usage += keylen1 + (cmph_uint32)sizeof(keylen1);
+ }
+ filename = (char *)calloc(strlen((char *)(brz->tmp_dir)) + 11, sizeof(char));
+ sprintf(filename, "%s%u.cmph",brz->tmp_dir, nflushes);
+ tmp_fd = fopen(filename, "wb");
+ free(filename);
+ filename = NULL;
+ for(i = 0; i < nkeys_in_buffer; i++)
+ {
+ memcpy(&keylen1, buffer + keys_index[i], sizeof(keylen1));
+ nbytes = fwrite(buffer + keys_index[i], (size_t)1, keylen1 + sizeof(keylen1), tmp_fd);
+ }
+ nkeys_in_buffer = 0;
+ memory_usage = 0;
+ memset((void *)buckets_size, 0, brz->k*sizeof(cmph_uint32));
+ nflushes++;
+ free(keys_index);
+ fclose(tmp_fd);
+ }
+ memcpy(buffer + memory_usage, &keylen, sizeof(keylen));
+ memcpy(buffer + memory_usage + sizeof(keylen), key, (size_t)keylen);
+ memory_usage += keylen + (cmph_uint32)sizeof(keylen);
+ h0 = hash(brz->h0, key, keylen) % brz->k;
+
+ if ((brz->size[h0] == MAX_BUCKET_SIZE) || (brz->algo == CMPH_BMZ8 && ((brz->c >= 1.0) && (cmph_uint8)(brz->c * brz->size[h0]) < brz->size[h0])))
+ {
+ free(buffer);
+ free(buckets_size);
+ return 0;
+ }
+ brz->size[h0] = (cmph_uint8)(brz->size[h0] + 1U);
+ buckets_size[h0] ++;
+ nkeys_in_buffer++;
+ mph->key_source->dispose(mph->key_source->data, key, keylen);
+ }
+ if (memory_usage != 0) // flush buffers
+ {
+ if(mph->verbosity)
+ {
+ fprintf(stderr, "Flushing %u\n", nkeys_in_buffer);
+ }
+ cmph_uint32 value = buckets_size[0];
+ cmph_uint32 sum = 0;
+ cmph_uint32 keylen1 = 0;
+ buckets_size[0] = 0;
+ for(i = 1; i < brz->k; i++)
+ {
+ if(buckets_size[i] == 0) continue;
+ sum += value;
+ value = buckets_size[i];
+ buckets_size[i] = sum;
+ }
+ memory_usage = 0;
+ keys_index = (cmph_uint32 *)calloc((size_t)nkeys_in_buffer, sizeof(cmph_uint32));
+ for(i = 0; i < nkeys_in_buffer; i++)
+ {
+ memcpy(&keylen1, buffer + memory_usage, sizeof(keylen1));
+ h0 = hash(brz->h0, (char *)(buffer + memory_usage + sizeof(keylen1)), keylen1) % brz->k;
+ keys_index[buckets_size[h0]] = memory_usage;
+ buckets_size[h0]++;
+ memory_usage += keylen1 + (cmph_uint32)sizeof(keylen1);
+ }
+ filename = (char *)calloc(strlen((char *)(brz->tmp_dir)) + 11, sizeof(char));
+ sprintf(filename, "%s%u.cmph",brz->tmp_dir, nflushes);
+ tmp_fd = fopen(filename, "wb");
+ free(filename);
+ filename = NULL;
+ for(i = 0; i < nkeys_in_buffer; i++)
+ {
+ memcpy(&keylen1, buffer + keys_index[i], sizeof(keylen1));
+ nbytes = fwrite(buffer + keys_index[i], (size_t)1, keylen1 + sizeof(keylen1), tmp_fd);
+ }
+ nkeys_in_buffer = 0;
+ memory_usage = 0;
+ memset((void *)buckets_size, 0, brz->k*sizeof(cmph_uint32));
+ nflushes++;
+ free(keys_index);
+ fclose(tmp_fd);
+ }
+
+ free(buffer);
+ free(buckets_size);
+ if(nflushes > 1024) return 0; // Too many files generated.
+ // mphf generation
+ if(mph->verbosity)
+ {
+ fprintf(stderr, "\nMPHF generation \n");
+ }
+ /* Starting to dump to disk the resultant MPHF: __cmph_dump function */
+ nbytes = fwrite(cmph_names[CMPH_BRZ], (size_t)(strlen(cmph_names[CMPH_BRZ]) + 1), (size_t)1, brz->mphf_fd);
+ nbytes = fwrite(&(brz->m), sizeof(brz->m), (size_t)1, brz->mphf_fd);
+ nbytes = fwrite(&(brz->c), sizeof(double), (size_t)1, brz->mphf_fd);
+ nbytes = fwrite(&(brz->algo), sizeof(brz->algo), (size_t)1, brz->mphf_fd);
+ nbytes = fwrite(&(brz->k), sizeof(cmph_uint32), (size_t)1, brz->mphf_fd); // number of MPHFs
+ nbytes = fwrite(brz->size, sizeof(cmph_uint8)*(brz->k), (size_t)1, brz->mphf_fd);
+
+ //tmp_fds = (FILE **)calloc(nflushes, sizeof(FILE *));
+ buff_manager = buffer_manager_new(brz->memory_availability, nflushes);
+ buffer_merge = (cmph_uint8 **)calloc((size_t)nflushes, sizeof(cmph_uint8 *));
+ buffer_h0 = (cmph_uint32 *)calloc((size_t)nflushes, sizeof(cmph_uint32));
+
+ memory_usage = 0;
+ for(i = 0; i < nflushes; i++)
+ {
+ filename = (char *)calloc(strlen((char *)(brz->tmp_dir)) + 11, sizeof(char));
+ sprintf(filename, "%s%u.cmph",brz->tmp_dir, i);
+ buffer_manager_open(buff_manager, i, filename);
+ free(filename);
+ filename = NULL;
+ key = (char *)buffer_manager_read_key(buff_manager, i, &keylen);
+ h0 = hash(brz->h0, key+sizeof(keylen), keylen) % brz->k;
+ buffer_h0[i] = h0;
+ buffer_merge[i] = (cmph_uint8 *)key;
+ key = NULL; //transfer memory ownership
+ }
+ e = 0;
+ keys_vd = (cmph_uint8 **)calloc((size_t)MAX_BUCKET_SIZE, sizeof(cmph_uint8 *));
+ nkeys_vd = 0;
+ error = 0;
+ while(e < brz->m)
+ {
+ i = brz_min_index(buffer_h0, nflushes);
+ cur_bucket = buffer_h0[i];
+ key = (char *)buffer_manager_read_key(buff_manager, i, &keylen);
+ if(key)
+ {
+ while(key)
+ {
+ //keylen = strlen(key);
+ h0 = hash(brz->h0, key+sizeof(keylen), keylen) % brz->k;
+ if (h0 != buffer_h0[i]) break;
+ keys_vd[nkeys_vd++] = (cmph_uint8 *)key;
+ key = NULL; //transfer memory ownership
+ e++;
+ key = (char *)buffer_manager_read_key(buff_manager, i, &keylen);
+ }
+ if (key)
+ {
+ assert(nkeys_vd < brz->size[cur_bucket]);
+ keys_vd[nkeys_vd++] = buffer_merge[i];
+ buffer_merge[i] = NULL; //transfer memory ownership
+ e++;
+ buffer_h0[i] = h0;
+ buffer_merge[i] = (cmph_uint8 *)key;
+ }
+ }
+ if(!key)
+ {
+ assert(nkeys_vd < brz->size[cur_bucket]);
+ keys_vd[nkeys_vd++] = buffer_merge[i];
+ buffer_merge[i] = NULL; //transfer memory ownership
+ e++;
+ buffer_h0[i] = UINT_MAX;
+ }
+
+ if(nkeys_vd == brz->size[cur_bucket]) // Generating mphf for each bucket.
+ {
+ cmph_io_adapter_t *source = NULL;
+ cmph_config_t *config = NULL;
+ cmph_t *mphf_tmp = NULL;
+ char *bufmphf = NULL;
+ cmph_uint32 buflenmphf = 0;
+ // Source of keys
+ source = cmph_io_byte_vector_adapter(keys_vd, (cmph_uint32)nkeys_vd);
+ config = cmph_config_new(source);
+ cmph_config_set_algo(config, brz->algo);
+ //cmph_config_set_algo(config, CMPH_BMZ8);
+ cmph_config_set_graphsize(config, brz->c);
+ mphf_tmp = cmph_new(config);
+ if (mphf_tmp == NULL)
+ {
+ if(mph->verbosity) fprintf(stderr, "ERROR: Can't generate MPHF for bucket %u out of %u\n", cur_bucket + 1, brz->k);
+ error = 1;
+ cmph_config_destroy(config);
+ brz_destroy_keys_vd(keys_vd, nkeys_vd);
+ cmph_io_byte_vector_adapter_destroy(source);
+ break;
+ }
+ if(mph->verbosity)
+ {
+ if (cur_bucket % 1000 == 0)
+ {
+ fprintf(stderr, "MPHF for bucket %u out of %u was generated.\n", cur_bucket + 1, brz->k);
+ }
+ }
+ switch(brz->algo)
+ {
+ case CMPH_FCH:
+ {
+ fch_data_t * fchf = NULL;
+ fchf = (fch_data_t *)mphf_tmp->data;
+ bufmphf = brz_copy_partial_fch_mphf(brz, fchf, cur_bucket, &buflenmphf);
+ }
+ break;
+ case CMPH_BMZ8:
+ {
+ bmz8_data_t * bmzf = NULL;
+ bmzf = (bmz8_data_t *)mphf_tmp->data;
+ bufmphf = brz_copy_partial_bmz8_mphf(brz, bmzf, cur_bucket, &buflenmphf);
+ }
+ break;
+ default: assert(0);
+ }
+ nbytes = fwrite(bufmphf, (size_t)buflenmphf, (size_t)1, brz->mphf_fd);
+ free(bufmphf);
+ bufmphf = NULL;
+ cmph_config_destroy(config);
+ brz_destroy_keys_vd(keys_vd, nkeys_vd);
+ cmph_destroy(mphf_tmp);
+ cmph_io_byte_vector_adapter_destroy(source);
+ nkeys_vd = 0;
+ }
+ }
+ buffer_manager_destroy(buff_manager);
+ free(keys_vd);
+ free(buffer_merge);
+ free(buffer_h0);
+ if (error) return 0;
+ return 1;
+}
+
+static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n)
+{
+ cmph_uint32 i, min_index = 0;
+ for(i = 1; i < n; i++)
+ {
+ if(vector[i] < vector[min_index]) min_index = i;
+ }
+ return min_index;
+}
+
+static void brz_destroy_keys_vd(cmph_uint8 ** keys_vd, cmph_uint32 nkeys)
+{
+ cmph_uint8 i;
+ for(i = 0; i < nkeys; i++) { free(keys_vd[i]); keys_vd[i] = NULL;}
+}
+
+static char * brz_copy_partial_fch_mphf(brz_config_data_t *brz, fch_data_t * fchf, cmph_uint32 index, cmph_uint32 *buflen)
+{
+ cmph_uint32 i = 0;
+ cmph_uint32 buflenh1 = 0;
+ cmph_uint32 buflenh2 = 0;
+ char * bufh1 = NULL;
+ char * bufh2 = NULL;
+ char * buf = NULL;
+ cmph_uint32 n = fchf->b;//brz->size[index];
+ hash_state_dump(fchf->h1, &bufh1, &buflenh1);
+ hash_state_dump(fchf->h2, &bufh2, &buflenh2);
+ *buflen = buflenh1 + buflenh2 + n + 2U * (cmph_uint32)sizeof(cmph_uint32);
+ buf = (char *)malloc((size_t)(*buflen));
+ memcpy(buf, &buflenh1, sizeof(cmph_uint32));
+ memcpy(buf+sizeof(cmph_uint32), bufh1, (size_t)buflenh1);
+ memcpy(buf+sizeof(cmph_uint32)+buflenh1, &buflenh2, sizeof(cmph_uint32));
+ memcpy(buf+2*sizeof(cmph_uint32)+buflenh1, bufh2, (size_t)buflenh2);
+ for (i = 0; i < n; i++) memcpy(buf+2*sizeof(cmph_uint32)+buflenh1+buflenh2+i,(fchf->g + i), (size_t)1);
+ free(bufh1);
+ free(bufh2);
+ return buf;
+}
+static char * brz_copy_partial_bmz8_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_uint32 *buflen)
+{
+ cmph_uint32 buflenh1 = 0;
+ cmph_uint32 buflenh2 = 0;
+ char * bufh1 = NULL;
+ char * bufh2 = NULL;
+ char * buf = NULL;
+ cmph_uint32 n = (cmph_uint32)ceil(brz->c * brz->size[index]);
+ hash_state_dump(bmzf->hashes[0], &bufh1, &buflenh1);
+ hash_state_dump(bmzf->hashes[1], &bufh2, &buflenh2);
+ *buflen = buflenh1 + buflenh2 + n + 2U * (cmph_uint32)sizeof(cmph_uint32);
+ buf = (char *)malloc((size_t)(*buflen));
+ memcpy(buf, &buflenh1, sizeof(cmph_uint32));
+ memcpy(buf+sizeof(cmph_uint32), bufh1, (size_t)buflenh1);
+ memcpy(buf+sizeof(cmph_uint32)+buflenh1, &buflenh2, sizeof(cmph_uint32));
+ memcpy(buf+2*sizeof(cmph_uint32)+buflenh1, bufh2, (size_t)buflenh2);
+ memcpy(buf+2*sizeof(cmph_uint32)+buflenh1+buflenh2,bmzf->g, (size_t)n);
+ free(bufh1);
+ free(bufh2);
+ return buf;
+}
+
+
+int brz_dump(cmph_t *mphf, FILE *fd)
+{
+ brz_data_t *data = (brz_data_t *)mphf->data;
+ char *buf = NULL;
+ cmph_uint32 buflen;
+ register size_t nbytes;
+ DEBUGP("Dumping brzf\n");
+ // The initial part of the MPHF have already been dumped to disk during construction
+ // Dumping h0
+ hash_state_dump(data->h0, &buf, &buflen);
+ DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
+ nbytes = fwrite(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
+ nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd);
+ free(buf);
+ // Dumping m and the vector offset.
+ nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
+ nbytes = fwrite(data->offset, sizeof(cmph_uint32)*(data->k), (size_t)1, fd);
+ return 1;
+}
+
+void brz_load(FILE *f, cmph_t *mphf)
+{
+ char *buf = NULL;
+ cmph_uint32 buflen;
+ register size_t nbytes;
+ cmph_uint32 i, n;
+ brz_data_t *brz = (brz_data_t *)malloc(sizeof(brz_data_t));
+
+ DEBUGP("Loading brz mphf\n");
+ mphf->data = brz;
+ nbytes = fread(&(brz->c), sizeof(double), (size_t)1, f);
+ nbytes = fread(&(brz->algo), sizeof(brz->algo), (size_t)1, f); // Reading algo.
+ nbytes = fread(&(brz->k), sizeof(cmph_uint32), (size_t)1, f);
+ brz->size = (cmph_uint8 *) malloc(sizeof(cmph_uint8)*brz->k);
+ nbytes = fread(brz->size, sizeof(cmph_uint8)*(brz->k), (size_t)1, f);
+ brz->h1 = (hash_state_t **)malloc(sizeof(hash_state_t *)*brz->k);
+ brz->h2 = (hash_state_t **)malloc(sizeof(hash_state_t *)*brz->k);
+ brz->g = (cmph_uint8 **) calloc((size_t)brz->k, sizeof(cmph_uint8 *));
+ DEBUGP("Reading c = %f k = %u algo = %u \n", brz->c, brz->k, brz->algo);
+ //loading h_i1, h_i2 and g_i.
+ for(i = 0; i < brz->k; i++)
+ {
+ // h1
+ nbytes = fread(&buflen, sizeof(cmph_uint32), (size_t)1, f);
+ DEBUGP("Hash state 1 has %u bytes\n", buflen);
+ buf = (char *)malloc((size_t)buflen);
+ nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
+ brz->h1[i] = hash_state_load(buf, buflen);
+ free(buf);
+ //h2
+ nbytes = fread(&buflen, sizeof(cmph_uint32), (size_t)1, f);
+ DEBUGP("Hash state 2 has %u bytes\n", buflen);
+ buf = (char *)malloc((size_t)buflen);
+ nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
+ brz->h2[i] = hash_state_load(buf, buflen);
+ free(buf);
+ switch(brz->algo)
+ {
+ case CMPH_FCH:
+ n = fch_calc_b(brz->c, brz->size[i]);
+ break;
+ case CMPH_BMZ8:
+ n = (cmph_uint32)ceil(brz->c * brz->size[i]);
+ break;
+ default: assert(0);
+ }
+ DEBUGP("g_i has %u bytes\n", n);
+ brz->g[i] = (cmph_uint8 *)calloc((size_t)n, sizeof(cmph_uint8));
+ nbytes = fread(brz->g[i], sizeof(cmph_uint8)*n, (size_t)1, f);
+ }
+ //loading h0
+ nbytes = fread(&buflen, sizeof(cmph_uint32), (size_t)1, f);
+ DEBUGP("Hash state has %u bytes\n", buflen);
+ buf = (char *)malloc((size_t)buflen);
+ nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
+ brz->h0 = hash_state_load(buf, buflen);
+ free(buf);
+
+ //loading c, m, and the vector offset.
+ nbytes = fread(&(brz->m), sizeof(cmph_uint32), (size_t)1, f);
+ brz->offset = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*brz->k);
+ nbytes = fread(brz->offset, sizeof(cmph_uint32)*(brz->k), (size_t)1, f);
+ return;
+}
+
+static cmph_uint32 brz_bmz8_search(brz_data_t *brz, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint)
+{
+ register cmph_uint32 h0;
+
+ hash_vector(brz->h0, key, keylen, fingerprint);
+ h0 = fingerprint[2] % brz->k;
+
+ register cmph_uint32 m = brz->size[h0];
+ register cmph_uint32 n = (cmph_uint32)ceil(brz->c * m);
+ register cmph_uint32 h1 = hash(brz->h1[h0], key, keylen) % n;
+ register cmph_uint32 h2 = hash(brz->h2[h0], key, keylen) % n;
+ register cmph_uint8 mphf_bucket;
+
+ if (h1 == h2 && ++h2 >= n) h2 = 0;
+ mphf_bucket = (cmph_uint8)(brz->g[h0][h1] + brz->g[h0][h2]);
+ DEBUGP("key: %s h1: %u h2: %u h0: %u\n", key, h1, h2, h0);
+ DEBUGP("key: %s g[h1]: %u g[h2]: %u offset[h0]: %u edges: %u\n", key, brz->g[h0][h1], brz->g[h0][h2], brz->offset[h0], brz->m);
+ DEBUGP("Address: %u\n", mphf_bucket + brz->offset[h0]);
+ return (mphf_bucket + brz->offset[h0]);
+}
+
+static cmph_uint32 brz_fch_search(brz_data_t *brz, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint)
+{
+ register cmph_uint32 h0;
+
+ hash_vector(brz->h0, key, keylen, fingerprint);
+ h0 = fingerprint[2] % brz->k;
+
+ register cmph_uint32 m = brz->size[h0];
+ register cmph_uint32 b = fch_calc_b(brz->c, m);
+ register double p1 = fch_calc_p1(m);
+ register double p2 = fch_calc_p2(b);
+ register cmph_uint32 h1 = hash(brz->h1[h0], key, keylen) % m;
+ register cmph_uint32 h2 = hash(brz->h2[h0], key, keylen) % m;
+ register cmph_uint8 mphf_bucket = 0;
+ h1 = mixh10h11h12(b, p1, p2, h1);
+ mphf_bucket = (cmph_uint8)((h2 + brz->g[h0][h1]) % m);
+ return (mphf_bucket + brz->offset[h0]);
+}
+
+cmph_uint32 brz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
+{
+ brz_data_t *brz = mphf->data;
+ cmph_uint32 fingerprint[3];
+ switch(brz->algo)
+ {
+ case CMPH_FCH:
+ return brz_fch_search(brz, key, keylen, fingerprint);
+ case CMPH_BMZ8:
+ return brz_bmz8_search(brz, key, keylen, fingerprint);
+ default: assert(0);
+ }
+ return 0;
+}
+void brz_destroy(cmph_t *mphf)
+{
+ cmph_uint32 i;
+ brz_data_t *data = (brz_data_t *)mphf->data;
+ if(data->g)
+ {
+ for(i = 0; i < data->k; i++)
+ {
+ free(data->g[i]);
+ hash_state_destroy(data->h1[i]);
+ hash_state_destroy(data->h2[i]);
+ }
+ free(data->g);
+ free(data->h1);
+ free(data->h2);
+ }
+ hash_state_destroy(data->h0);
+ free(data->size);
+ free(data->offset);
+ free(data);
+ free(mphf);
+}
+
+/** \fn void brz_pack(cmph_t *mphf, void *packed_mphf);
+ * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
+ * \param mphf pointer to the resulting mphf
+ * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
+ */
+void brz_pack(cmph_t *mphf, void *packed_mphf)
+{
+ brz_data_t *data = (brz_data_t *)mphf->data;
+ cmph_uint8 * ptr = packed_mphf;
+ cmph_uint32 i,n;
+
+ // packing internal algo type
+ memcpy(ptr, &(data->algo), sizeof(data->algo));
+ ptr += sizeof(data->algo);
+
+ // packing h0 type
+ CMPH_HASH h0_type = hash_get_type(data->h0);
+ memcpy(ptr, &h0_type, sizeof(h0_type));
+ ptr += sizeof(h0_type);
+
+ // packing h0
+ hash_state_pack(data->h0, ptr);
+ ptr += hash_state_packed_size(h0_type);
+
+ // packing k
+ memcpy(ptr, &(data->k), sizeof(data->k));
+ ptr += sizeof(data->k);
+
+ // packing c
+ *((cmph_uint64 *)ptr) = (cmph_uint64)data->c;
+ ptr += sizeof(data->c);
+
+ // packing h1 type
+ CMPH_HASH h1_type = hash_get_type(data->h1[0]);
+ memcpy(ptr, &h1_type, sizeof(h1_type));
+ ptr += sizeof(h1_type);
+
+ // packing h2 type
+ CMPH_HASH h2_type = hash_get_type(data->h2[0]);
+ memcpy(ptr, &h2_type, sizeof(h2_type));
+ ptr += sizeof(h2_type);
+
+ // packing size
+ memcpy(ptr, data->size, sizeof(cmph_uint8)*data->k);
+ ptr += data->k;
+
+ // packing offset
+ memcpy(ptr, data->offset, sizeof(cmph_uint32)*data->k);
+ ptr += sizeof(cmph_uint32)*data->k;
+
+ #if defined (__ia64) || defined (__x86_64__)
+ cmph_uint64 * g_is_ptr = (cmph_uint64 *)ptr;
+ #else
+ cmph_uint32 * g_is_ptr = (cmph_uint32 *)ptr;
+ #endif
+
+ cmph_uint8 * g_i = (cmph_uint8 *) (g_is_ptr + data->k);
+
+ for(i = 0; i < data->k; i++)
+ {
+ #if defined (__ia64) || defined (__x86_64__)
+ *g_is_ptr++ = (cmph_uint64)g_i;
+ #else
+ *g_is_ptr++ = (cmph_uint32)g_i;
+ #endif
+ // packing h1[i]
+ hash_state_pack(data->h1[i], g_i);
+ g_i += hash_state_packed_size(h1_type);
+
+ // packing h2[i]
+ hash_state_pack(data->h2[i], g_i);
+ g_i += hash_state_packed_size(h2_type);
+
+ // packing g_i
+ switch(data->algo)
+ {
+ case CMPH_FCH:
+ n = fch_calc_b(data->c, data->size[i]);
+ break;
+ case CMPH_BMZ8:
+ n = (cmph_uint32)ceil(data->c * data->size[i]);
+ break;
+ default: assert(0);
+ }
+ memcpy(g_i, data->g[i], sizeof(cmph_uint8)*n);
+ g_i += n;
+
+ }
+
+}
+
+/** \fn cmph_uint32 brz_packed_size(cmph_t *mphf);
+ * \brief Return the amount of space needed to pack mphf.
+ * \param mphf pointer to a mphf
+ * \return the size of the packed function or zero for failures
+ */
+cmph_uint32 brz_packed_size(cmph_t *mphf)
+{
+ cmph_uint32 i;
+ cmph_uint32 size = 0;
+ brz_data_t *data = (brz_data_t *)mphf->data;
+ CMPH_HASH h0_type = hash_get_type(data->h0);
+ CMPH_HASH h1_type = hash_get_type(data->h1[0]);
+ CMPH_HASH h2_type = hash_get_type(data->h2[0]);
+ size = (cmph_uint32)(2*sizeof(CMPH_ALGO) + 3*sizeof(CMPH_HASH) + hash_state_packed_size(h0_type) + sizeof(cmph_uint32) +
+ sizeof(double) + sizeof(cmph_uint8)*data->k + sizeof(cmph_uint32)*data->k);
+ // pointers to g_is
+ #if defined (__ia64) || defined (__x86_64__)
+ size += (cmph_uint32) sizeof(cmph_uint64)*data->k;
+ #else
+ size += (cmph_uint32) sizeof(cmph_uint32)*data->k;
+ #endif
+
+ size += hash_state_packed_size(h1_type) * data->k;
+ size += hash_state_packed_size(h2_type) * data->k;
+
+ cmph_uint32 n = 0;
+ for(i = 0; i < data->k; i++)
+ {
+ switch(data->algo)
+ {
+ case CMPH_FCH:
+ n = fch_calc_b(data->c, data->size[i]);
+ break;
+ case CMPH_BMZ8:
+ n = (cmph_uint32)ceil(data->c * data->size[i]);
+ break;
+ default: assert(0);
+ }
+ size += n;
+ }
+ return size;
+}
+
+
+
+static cmph_uint32 brz_bmz8_search_packed(cmph_uint32 *packed_mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint)
+{
+ register CMPH_HASH h0_type = *packed_mphf++;
+ register cmph_uint32 *h0_ptr = packed_mphf;
+ packed_mphf = (cmph_uint32 *)(((cmph_uint8 *)packed_mphf) + hash_state_packed_size(h0_type));
+
+ register cmph_uint32 k = *packed_mphf++;
+
+ register double c = (double)(*((cmph_uint64*)packed_mphf));
+ packed_mphf += 2;
+
+ register CMPH_HASH h1_type = *packed_mphf++;
+
+ register CMPH_HASH h2_type = *packed_mphf++;
+
+ register cmph_uint8 * size = (cmph_uint8 *) packed_mphf;
+ packed_mphf = (cmph_uint32 *)(size + k);
+
+ register cmph_uint32 * offset = packed_mphf;
+ packed_mphf += k;
+
+ register cmph_uint32 h0;
+
+ hash_vector_packed(h0_ptr, h0_type, key, keylen, fingerprint);
+ h0 = fingerprint[2] % k;
+
+ register cmph_uint32 m = size[h0];
+ register cmph_uint32 n = (cmph_uint32)ceil(c * m);
+
+ #if defined (__ia64) || defined (__x86_64__)
+ register cmph_uint64 * g_is_ptr = (cmph_uint64 *)packed_mphf;
+ #else
+ register cmph_uint32 * g_is_ptr = packed_mphf;
+ #endif
+
+ register cmph_uint8 * h1_ptr = (cmph_uint8 *) g_is_ptr[h0];
+
+ register cmph_uint8 * h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
+
+ register cmph_uint8 * g = h2_ptr + hash_state_packed_size(h2_type);
+
+ register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n;
+ register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n;
+
+ register cmph_uint8 mphf_bucket;
+
+ if (h1 == h2 && ++h2 >= n) h2 = 0;
+ mphf_bucket = (cmph_uint8)(g[h1] + g[h2]);
+ DEBUGP("key: %s h1: %u h2: %u h0: %u\n", key, h1, h2, h0);
+ DEBUGP("Address: %u\n", mphf_bucket + offset[h0]);
+ return (mphf_bucket + offset[h0]);
+}
+
+static cmph_uint32 brz_fch_search_packed(cmph_uint32 *packed_mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint)
+{
+ register CMPH_HASH h0_type = *packed_mphf++;
+
+ register cmph_uint32 *h0_ptr = packed_mphf;
+ packed_mphf = (cmph_uint32 *)(((cmph_uint8 *)packed_mphf) + hash_state_packed_size(h0_type));
+
+ register cmph_uint32 k = *packed_mphf++;
+
+ register double c = (double)(*((cmph_uint64*)packed_mphf));
+ packed_mphf += 2;
+
+ register CMPH_HASH h1_type = *packed_mphf++;
+
+ register CMPH_HASH h2_type = *packed_mphf++;
+
+ register cmph_uint8 * size = (cmph_uint8 *) packed_mphf;
+ packed_mphf = (cmph_uint32 *)(size + k);
+
+ register cmph_uint32 * offset = packed_mphf;
+ packed_mphf += k;
+
+ register cmph_uint32 h0;
+
+ hash_vector_packed(h0_ptr, h0_type, key, keylen, fingerprint);
+ h0 = fingerprint[2] % k;
+
+ register cmph_uint32 m = size[h0];
+ register cmph_uint32 b = fch_calc_b(c, m);
+ register double p1 = fch_calc_p1(m);
+ register double p2 = fch_calc_p2(b);
+
+ #if defined (__ia64) || defined (__x86_64__)
+ register cmph_uint64 * g_is_ptr = (cmph_uint64 *)packed_mphf;
+ #else
+ register cmph_uint32 * g_is_ptr = packed_mphf;
+ #endif
+
+ register cmph_uint8 * h1_ptr = (cmph_uint8 *) g_is_ptr[h0];
+
+ register cmph_uint8 * h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
+
+ register cmph_uint8 * g = h2_ptr + hash_state_packed_size(h2_type);
+
+ register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % m;
+ register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % m;
+
+ register cmph_uint8 mphf_bucket = 0;
+ h1 = mixh10h11h12(b, p1, p2, h1);
+ mphf_bucket = (cmph_uint8)((h2 + g[h1]) % m);
+ return (mphf_bucket + offset[h0]);
+}
+
+/** cmph_uint32 brz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
+ * \brief Use the packed mphf to do a search.
+ * \param packed_mphf pointer to the packed mphf
+ * \param key key to be hashed
+ * \param keylen key legth in bytes
+ * \return The mphf value
+ */
+cmph_uint32 brz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
+{
+ register cmph_uint32 *ptr = (cmph_uint32 *)packed_mphf;
+ register CMPH_ALGO algo = *ptr++;
+ cmph_uint32 fingerprint[3];
+ switch(algo)
+ {
+ case CMPH_FCH:
+ return brz_fch_search_packed(ptr, key, keylen, fingerprint);
+ case CMPH_BMZ8:
+ return brz_bmz8_search_packed(ptr, key, keylen, fingerprint);
+ default: assert(0);
+ }
+}
+
diff --git a/girepository/cmph/brz.h b/girepository/cmph/brz.h
new file mode 100644
index 00000000..ac07ed76
--- /dev/null
+++ b/girepository/cmph/brz.h
@@ -0,0 +1,47 @@
+#ifndef __CMPH_BRZ_H__
+#define __CMPH_BRZ_H__
+
+#include "cmph.h"
+
+typedef struct __brz_data_t brz_data_t;
+typedef struct __brz_config_data_t brz_config_data_t;
+
+brz_config_data_t *brz_config_new();
+void brz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
+void brz_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir);
+void brz_config_set_mphf_fd(cmph_config_t *mph, FILE *mphf_fd);
+void brz_config_set_b(cmph_config_t *mph, cmph_uint32 b);
+void brz_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo);
+void brz_config_set_memory_availability(cmph_config_t *mph, cmph_uint32 memory_availability);
+void brz_config_destroy(cmph_config_t *mph);
+cmph_t *brz_new(cmph_config_t *mph, double c);
+
+void brz_load(FILE *f, cmph_t *mphf);
+int brz_dump(cmph_t *mphf, FILE *f);
+void brz_destroy(cmph_t *mphf);
+cmph_uint32 brz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
+
+/** \fn void brz_pack(cmph_t *mphf, void *packed_mphf);
+ * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
+ * \param mphf pointer to the resulting mphf
+ * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
+ */
+void brz_pack(cmph_t *mphf, void *packed_mphf);
+
+/** \fn cmph_uint32 brz_packed_size(cmph_t *mphf);
+ * \brief Return the amount of space needed to pack mphf.
+ * \param mphf pointer to a mphf
+ * \return the size of the packed function or zero for failures
+ */
+cmph_uint32 brz_packed_size(cmph_t *mphf);
+
+/** cmph_uint32 brz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
+ * \brief Use the packed mphf to do a search.
+ * \param packed_mphf pointer to the packed mphf
+ * \param key key to be hashed
+ * \param keylen key legth in bytes
+ * \return The mphf value
+ */
+cmph_uint32 brz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
+
+#endif
diff --git a/girepository/cmph/brz_structs.h b/girepository/cmph/brz_structs.h
new file mode 100755
index 00000000..b781107f
--- /dev/null
+++ b/girepository/cmph/brz_structs.h
@@ -0,0 +1,39 @@
+#ifndef __CMPH_BRZ_STRUCTS_H__
+#define __CMPH_BRZ_STRUCTS_H__
+
+#include "hash_state.h"
+
+struct __brz_data_t
+{
+ CMPH_ALGO algo; // CMPH algo for generating the MPHFs for the buckets (Just CMPH_FCH and CMPH_BMZ8)
+ cmph_uint32 m; // edges (words) count
+ double c; // constant c
+ cmph_uint8 *size; // size[i] stores the number of edges represented by g[i][...].
+ cmph_uint32 *offset; // offset[i] stores the sum: size[0] + size[1] + ... size[i-1].
+ cmph_uint8 **g; // g function.
+ cmph_uint32 k; // number of components
+ hash_state_t **h1;
+ hash_state_t **h2;
+ hash_state_t * h0;
+};
+
+struct __brz_config_data_t
+{
+ CMPH_HASH hashfuncs[3];
+ CMPH_ALGO algo; // CMPH algo for generating the MPHFs for the buckets (Just CMPH_FCH and CMPH_BMZ8)
+ double c; // constant c
+ cmph_uint32 m; // edges (words) count
+ cmph_uint8 *size; // size[i] stores the number of edges represented by g[i][...].
+ cmph_uint32 *offset; // offset[i] stores the sum: size[0] + size[1] + ... size[i-1].
+ cmph_uint8 **g; // g function.
+ cmph_uint8 b; // parameter b.
+ cmph_uint32 k; // number of components
+ hash_state_t **h1;
+ hash_state_t **h2;
+ hash_state_t * h0;
+ cmph_uint32 memory_availability;
+ cmph_uint8 * tmp_dir; // temporary directory
+ FILE * mphf_fd; // mphf file
+};
+
+#endif
diff --git a/girepository/cmph/buffer_entry.c b/girepository/cmph/buffer_entry.c
new file mode 100644
index 00000000..7f82aae1
--- /dev/null
+++ b/girepository/cmph/buffer_entry.c
@@ -0,0 +1,103 @@
+#include "buffer_entry.h"
+#include <stdio.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+struct __buffer_entry_t
+{
+ FILE *fd;
+ cmph_uint8 * buff;
+ cmph_uint32 capacity, // buffer entry capacity
+ nbytes, // buffer entry used bytes
+ pos; // current read position in buffer entry
+ cmph_uint8 eof; // flag to indicate end of file
+};
+
+buffer_entry_t * buffer_entry_new(cmph_uint32 capacity)
+{
+ buffer_entry_t *buff_entry = (buffer_entry_t *)malloc(sizeof(buffer_entry_t));
+ assert(buff_entry);
+ buff_entry->fd = NULL;
+ buff_entry->buff = NULL;
+ buff_entry->capacity = capacity;
+ buff_entry->nbytes = capacity;
+ buff_entry->pos = capacity;
+ buff_entry->eof = 0;
+ return buff_entry;
+}
+
+void buffer_entry_open(buffer_entry_t * buffer_entry, char * filename)
+{
+ buffer_entry->fd = fopen(filename, "rb");
+}
+
+void buffer_entry_set_capacity(buffer_entry_t * buffer_entry, cmph_uint32 capacity)
+{
+ buffer_entry->capacity = capacity;
+}
+
+
+cmph_uint32 buffer_entry_get_capacity(buffer_entry_t * buffer_entry)
+{
+ return buffer_entry->capacity;
+}
+
+void buffer_entry_load(buffer_entry_t * buffer_entry)
+{
+ free(buffer_entry->buff);
+ buffer_entry->buff = (cmph_uint8 *)calloc((size_t)buffer_entry->capacity, sizeof(cmph_uint8));
+ buffer_entry->nbytes = (cmph_uint32)fread(buffer_entry->buff, (size_t)1, (size_t)buffer_entry->capacity, buffer_entry->fd);
+ if (buffer_entry->nbytes != buffer_entry->capacity) buffer_entry->eof = 1;
+ buffer_entry->pos = 0;
+}
+
+cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry, cmph_uint32 * keylen)
+{
+ cmph_uint8 * buf = NULL;
+ cmph_uint32 lacked_bytes = sizeof(*keylen);
+ cmph_uint32 copied_bytes = 0;
+ if(buffer_entry->eof && (buffer_entry->pos == buffer_entry->nbytes)) // end
+ {
+ free(buf);
+ return NULL;
+ }
+ if((buffer_entry->pos + lacked_bytes) > buffer_entry->nbytes)
+ {
+ copied_bytes = buffer_entry->nbytes - buffer_entry->pos;
+ lacked_bytes = (buffer_entry->pos + lacked_bytes) - buffer_entry->nbytes;
+ if (copied_bytes != 0) memcpy(keylen, buffer_entry->buff + buffer_entry->pos, (size_t)copied_bytes);
+ buffer_entry_load(buffer_entry);
+ }
+ memcpy(keylen + copied_bytes, buffer_entry->buff + buffer_entry->pos, (size_t)lacked_bytes);
+ buffer_entry->pos += lacked_bytes;
+
+ lacked_bytes = *keylen;
+ copied_bytes = 0;
+ buf = (cmph_uint8 *)malloc(*keylen + sizeof(*keylen));
+ memcpy(buf, keylen, sizeof(*keylen));
+ if((buffer_entry->pos + lacked_bytes) > buffer_entry->nbytes) {
+ copied_bytes = buffer_entry->nbytes - buffer_entry->pos;
+ lacked_bytes = (buffer_entry->pos + lacked_bytes) - buffer_entry->nbytes;
+ if (copied_bytes != 0) {
+ memcpy(buf + sizeof(*keylen), buffer_entry->buff + buffer_entry->pos, (size_t)copied_bytes);
+ }
+ buffer_entry_load(buffer_entry);
+ }
+ memcpy(buf+sizeof(*keylen)+copied_bytes, buffer_entry->buff + buffer_entry->pos, (size_t)lacked_bytes);
+ buffer_entry->pos += lacked_bytes;
+ return buf;
+}
+
+void buffer_entry_destroy(buffer_entry_t * buffer_entry)
+{
+ fclose(buffer_entry->fd);
+ buffer_entry->fd = NULL;
+ free(buffer_entry->buff);
+ buffer_entry->buff = NULL;
+ buffer_entry->capacity = 0;
+ buffer_entry->nbytes = 0;
+ buffer_entry->pos = 0;
+ buffer_entry->eof = 0;
+ free(buffer_entry);
+}
diff --git a/girepository/cmph/buffer_entry.h b/girepository/cmph/buffer_entry.h
new file mode 100644
index 00000000..62102bab
--- /dev/null
+++ b/girepository/cmph/buffer_entry.h
@@ -0,0 +1,14 @@
+#ifndef __CMPH_BUFFER_ENTRY_H__
+#define __CMPH_BUFFER_ENTRY_H__
+
+#include "cmph_types.h"
+#include <stdio.h>
+typedef struct __buffer_entry_t buffer_entry_t;
+
+buffer_entry_t * buffer_entry_new(cmph_uint32 capacity);
+void buffer_entry_set_capacity(buffer_entry_t * buffer_entry, cmph_uint32 capacity);
+cmph_uint32 buffer_entry_get_capacity(buffer_entry_t * buffer_entry);
+void buffer_entry_open(buffer_entry_t * buffer_entry, char * filename);
+cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry, cmph_uint32 * keylen);
+void buffer_entry_destroy(buffer_entry_t * buffer_entry);
+#endif
diff --git a/girepository/cmph/buffer_manage.c b/girepository/cmph/buffer_manage.c
new file mode 100644
index 00000000..fdefc620
--- /dev/null
+++ b/girepository/cmph/buffer_manage.c
@@ -0,0 +1,66 @@
+#include "buffer_manage.h"
+#include "buffer_entry.h"
+#include <stdio.h>
+#include <assert.h>
+#include <stdlib.h>
+struct __buffer_manage_t
+{
+ cmph_uint32 memory_avail; // memory available
+ buffer_entry_t ** buffer_entries; // buffer entries to be managed
+ cmph_uint32 nentries; // number of entries to be managed
+ cmph_uint32 *memory_avail_list; // memory available list
+ int pos_avail_list; // current position in memory available list
+};
+
+buffer_manage_t * buffer_manage_new(cmph_uint32 memory_avail, cmph_uint32 nentries)
+{
+ cmph_uint32 memory_avail_entry, i;
+ buffer_manage_t *buff_manage = (buffer_manage_t *)malloc(sizeof(buffer_manage_t));
+ assert(buff_manage);
+ buff_manage->memory_avail = memory_avail;
+ buff_manage->buffer_entries = (buffer_entry_t **)calloc((size_t)nentries, sizeof(buffer_entry_t *));
+ buff_manage->memory_avail_list = (cmph_uint32 *)calloc((size_t)nentries, sizeof(cmph_uint32));
+ buff_manage->pos_avail_list = -1;
+ buff_manage->nentries = nentries;
+ memory_avail_entry = buff_manage->memory_avail/buff_manage->nentries + 1;
+ for(i = 0; i < buff_manage->nentries; i++)
+ {
+ buff_manage->buffer_entries[i] = buffer_entry_new(memory_avail_entry);
+ }
+ return buff_manage;
+}
+
+void buffer_manage_open(buffer_manage_t * buffer_manage, cmph_uint32 index, char * filename)
+{
+ buffer_entry_open(buffer_manage->buffer_entries[index], filename);
+}
+
+cmph_uint8 * buffer_manage_read_key(buffer_manage_t * buffer_manage, cmph_uint32 index)
+{
+ cmph_uint8 * key = NULL;
+ if (buffer_manage->pos_avail_list >= 0 ) // recovering memory
+ {
+ cmph_uint32 new_capacity = buffer_entry_get_capacity(buffer_manage->buffer_entries[index]) + buffer_manage->memory_avail_list[(buffer_manage->pos_avail_list)--];
+ buffer_entry_set_capacity(buffer_manage->buffer_entries[index], new_capacity);
+ //fprintf(stderr, "recovering memory\n");
+ }
+ key = buffer_entry_read_key(buffer_manage->buffer_entries[index]);
+ if (key == NULL) // storing memory to be recovered
+ {
+ buffer_manage->memory_avail_list[++(buffer_manage->pos_avail_list)] = buffer_entry_get_capacity(buffer_manage->buffer_entries[index]);
+ //fprintf(stderr, "storing memory to be recovered\n");
+ }
+ return key;
+}
+
+void buffer_manage_destroy(buffer_manage_t * buffer_manage)
+{
+ cmph_uint32 i;
+ for(i = 0; i < buffer_manage->nentries; i++)
+ {
+ buffer_entry_destroy(buffer_manage->buffer_entries[i]);
+ }
+ free(buffer_manage->memory_avail_list);
+ free(buffer_manage->buffer_entries);
+ free(buffer_manage);
+}
diff --git a/girepository/cmph/buffer_manage.h b/girepository/cmph/buffer_manage.h
new file mode 100644
index 00000000..8c66cffc
--- /dev/null
+++ b/girepository/cmph/buffer_manage.h
@@ -0,0 +1,12 @@
+#ifndef __CMPH_BUFFER_MANAGE_H__
+#define __CMPH_BUFFER_MANAGE_H__
+
+#include "cmph_types.h"
+#include <stdio.h>
+typedef struct __buffer_manage_t buffer_manage_t;
+
+buffer_manage_t * buffer_manage_new(cmph_uint32 memory_avail, cmph_uint32 nentries);
+void buffer_manage_open(buffer_manage_t * buffer_manage, cmph_uint32 index, char * filename);
+cmph_uint8 * buffer_manage_read_key(buffer_manage_t * buffer_manage, cmph_uint32 index);
+void buffer_manage_destroy(buffer_manage_t * buffer_manage);
+#endif
diff --git a/girepository/cmph/buffer_manager.c b/girepository/cmph/buffer_manager.c
new file mode 100644
index 00000000..5a051e2f
--- /dev/null
+++ b/girepository/cmph/buffer_manager.c
@@ -0,0 +1,64 @@
+#include "buffer_manager.h"
+#include "buffer_entry.h"
+#include <stdio.h>
+#include <assert.h>
+#include <stdlib.h>
+struct __buffer_manager_t
+{
+ cmph_uint32 memory_avail; // memory available
+ buffer_entry_t ** buffer_entries; // buffer entries to be managed
+ cmph_uint32 nentries; // number of entries to be managed
+ cmph_uint32 *memory_avail_list; // memory available list
+ int pos_avail_list; // current position in memory available list
+};
+
+buffer_manager_t * buffer_manager_new(cmph_uint32 memory_avail, cmph_uint32 nentries)
+{
+ cmph_uint32 memory_avail_entry, i;
+ buffer_manager_t *buff_manager = (buffer_manager_t *)malloc(sizeof(buffer_manager_t));
+ assert(buff_manager);
+ buff_manager->memory_avail = memory_avail;
+ buff_manager->buffer_entries = (buffer_entry_t **)calloc((size_t)nentries, sizeof(buffer_entry_t *));
+ buff_manager->memory_avail_list = (cmph_uint32 *)calloc((size_t)nentries, sizeof(cmph_uint32));
+ buff_manager->pos_avail_list = -1;
+ buff_manager->nentries = nentries;
+ memory_avail_entry = buff_manager->memory_avail/buff_manager->nentries + 1;
+ for(i = 0; i < buff_manager->nentries; i++)
+ {
+ buff_manager->buffer_entries[i] = buffer_entry_new(memory_avail_entry);
+ }
+ return buff_manager;
+}
+
+void buffer_manager_open(buffer_manager_t * buffer_manager, cmph_uint32 index, char * filename)
+{
+ buffer_entry_open(buffer_manager->buffer_entries[index], filename);
+}
+
+cmph_uint8 * buffer_manager_read_key(buffer_manager_t * buffer_manager, cmph_uint32 index, cmph_uint32 * keylen)
+{
+ cmph_uint8 * key = NULL;
+ if (buffer_manager->pos_avail_list >= 0 ) // recovering memory
+ {
+ cmph_uint32 new_capacity = buffer_entry_get_capacity(buffer_manager->buffer_entries[index]) + buffer_manager->memory_avail_list[(buffer_manager->pos_avail_list)--];
+ buffer_entry_set_capacity(buffer_manager->buffer_entries[index], new_capacity);
+ }
+ key = buffer_entry_read_key(buffer_manager->buffer_entries[index], keylen);
+ if (key == NULL) // storing memory to be recovered
+ {
+ buffer_manager->memory_avail_list[++(buffer_manager->pos_avail_list)] = buffer_entry_get_capacity(buffer_manager->buffer_entries[index]);
+ }
+ return key;
+}
+
+void buffer_manager_destroy(buffer_manager_t * buffer_manager)
+{
+ cmph_uint32 i;
+ for(i = 0; i < buffer_manager->nentries; i++)
+ {
+ buffer_entry_destroy(buffer_manager->buffer_entries[i]);
+ }
+ free(buffer_manager->memory_avail_list);
+ free(buffer_manager->buffer_entries);
+ free(buffer_manager);
+}
diff --git a/girepository/cmph/buffer_manager.h b/girepository/cmph/buffer_manager.h
new file mode 100644
index 00000000..af99c20f
--- /dev/null
+++ b/girepository/cmph/buffer_manager.h
@@ -0,0 +1,12 @@
+#ifndef __CMPH_BUFFER_MANAGE_H__
+#define __CMPH_BUFFER_MANAGE_H__
+
+#include "cmph_types.h"
+#include <stdio.h>
+typedef struct __buffer_manager_t buffer_manager_t;
+
+buffer_manager_t * buffer_manager_new(cmph_uint32 memory_avail, cmph_uint32 nentries);
+void buffer_manager_open(buffer_manager_t * buffer_manager, cmph_uint32 index, char * filename);
+cmph_uint8 * buffer_manager_read_key(buffer_manager_t * buffer_manager, cmph_uint32 index, cmph_uint32 * keylen);
+void buffer_manager_destroy(buffer_manager_t * buffer_manager);
+#endif
diff --git a/girepository/cmph/chd.c b/girepository/cmph/chd.c
new file mode 100644
index 00000000..7fb3b8bb
--- /dev/null
+++ b/girepository/cmph/chd.c
@@ -0,0 +1,271 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<string.h>
+#include<math.h>
+#include<time.h>
+#include<assert.h>
+#include<limits.h>
+
+#include "cmph_structs.h"
+#include "chd_structs.h"
+#include "chd.h"
+#include "bitbool.h"
+//#define DEBUG
+#include "debug.h"
+
+chd_config_data_t *chd_config_new(cmph_config_t *mph)
+{
+ cmph_io_adapter_t *key_source = mph->key_source;
+ chd_config_data_t *chd;
+ chd = (chd_config_data_t *)malloc(sizeof(chd_config_data_t));
+ assert(chd);
+ memset(chd, 0, sizeof(chd_config_data_t));
+
+ chd->chd_ph = cmph_config_new(key_source);
+ cmph_config_set_algo(chd->chd_ph, CMPH_CHD_PH);
+
+ return chd;
+}
+
+void chd_config_destroy(cmph_config_t *mph)
+{
+ chd_config_data_t *data = (chd_config_data_t *) mph->data;
+ DEBUGP("Destroying algorithm dependent data\n");
+ if(data->chd_ph)
+ {
+ cmph_config_destroy(data->chd_ph);
+ data->chd_ph = NULL;
+ }
+ free(data);
+}
+
+
+void chd_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
+{
+ chd_config_data_t *data = (chd_config_data_t *) mph->data;
+ cmph_config_set_hashfuncs(data->chd_ph, hashfuncs);
+}
+
+
+void chd_config_set_b(cmph_config_t *mph, cmph_uint32 keys_per_bucket)
+{
+ chd_config_data_t *data = (chd_config_data_t *) mph->data;
+ cmph_config_set_b(data->chd_ph, keys_per_bucket);
+}
+
+
+void chd_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin)
+{
+ chd_config_data_t *data = (chd_config_data_t *) mph->data;
+ cmph_config_set_keys_per_bin(data->chd_ph, keys_per_bin);
+}
+
+
+cmph_t *chd_new(cmph_config_t *mph, double c)
+{
+ cmph_t *mphf = NULL;
+ chd_data_t *chdf = NULL;
+ chd_config_data_t *chd = (chd_config_data_t *)mph->data;
+ chd_ph_config_data_t * chd_ph = (chd_ph_config_data_t *)chd->chd_ph->data;
+ compressed_rank_t cr;
+
+ register cmph_t * chd_phf = NULL;
+ register cmph_uint32 packed_chd_phf_size = 0;
+ cmph_uint8 * packed_chd_phf = NULL;
+
+ register cmph_uint32 packed_cr_size = 0;
+ cmph_uint8 * packed_cr = NULL;
+
+ register cmph_uint32 i, idx, nkeys, nvals, nbins;
+ cmph_uint32 * vals_table = NULL;
+ register cmph_uint32 * occup_table = NULL;
+ #ifdef CMPH_TIMING
+ double construction_time_begin = 0.0;
+ double construction_time = 0.0;
+ ELAPSED_TIME_IN_SECONDS(&construction_time_begin);
+ #endif
+
+ cmph_config_set_verbosity(chd->chd_ph, mph->verbosity);
+ cmph_config_set_graphsize(chd->chd_ph, c);
+
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Generating a CHD_PH perfect hash function with a load factor equal to %.3f\n", c);
+ }
+
+ chd_phf = cmph_new(chd->chd_ph);
+
+ if(chd_phf == NULL)
+ {
+ return NULL;
+ }
+
+ packed_chd_phf_size = cmph_packed_size(chd_phf);
+ DEBUGP("packed_chd_phf_size = %u\n", packed_chd_phf_size);
+
+ /* Make sure that we have enough space to pack the mphf. */
+ packed_chd_phf = calloc((size_t)packed_chd_phf_size,(size_t)1);
+
+ /* Pack the mphf. */
+ cmph_pack(chd_phf, packed_chd_phf);
+
+ cmph_destroy(chd_phf);
+
+
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Compressing the range of the resulting CHD_PH perfect hash function\n");
+ }
+
+ compressed_rank_init(&cr);
+ nbins = chd_ph->n;
+ nkeys = chd_ph->m;
+ nvals = nbins - nkeys;
+
+ vals_table = (cmph_uint32 *)calloc(nvals, sizeof(cmph_uint32));
+ occup_table = (cmph_uint32 *)chd_ph->occup_table;
+
+ for(i = 0, idx = 0; i < nbins; i++)
+ {
+ if(!GETBIT32(occup_table, i))
+ {
+ vals_table[idx++] = i;
+ }
+ }
+
+ compressed_rank_generate(&cr, vals_table, nvals);
+ free(vals_table);
+
+ packed_cr_size = compressed_rank_packed_size(&cr);
+ packed_cr = (cmph_uint8 *) calloc(packed_cr_size, sizeof(cmph_uint8));
+ compressed_rank_pack(&cr, packed_cr);
+ compressed_rank_destroy(&cr);
+
+ mphf = (cmph_t *)malloc(sizeof(cmph_t));
+ mphf->algo = mph->algo;
+ chdf = (chd_data_t *)malloc(sizeof(chd_data_t));
+
+ chdf->packed_cr = packed_cr;
+ packed_cr = NULL; //transfer memory ownership
+
+ chdf->packed_chd_phf = packed_chd_phf;
+ packed_chd_phf = NULL; //transfer memory ownership
+
+ chdf->packed_chd_phf_size = packed_chd_phf_size;
+ chdf->packed_cr_size = packed_cr_size;
+
+ mphf->data = chdf;
+ mphf->size = nkeys;
+
+ DEBUGP("Successfully generated minimal perfect hash\n");
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Successfully generated minimal perfect hash function\n");
+ }
+ #ifdef CMPH_TIMING
+ ELAPSED_TIME_IN_SECONDS(&construction_time);
+ register cmph_uint32 space_usage = chd_packed_size(mphf)*8;
+ construction_time = construction_time - construction_time_begin;
+ fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", nkeys, c, chd_ph->keys_per_bucket, construction_time, space_usage/(double)nkeys);
+ #endif
+
+ return mphf;
+}
+
+void chd_load(FILE *fd, cmph_t *mphf)
+{
+ register size_t nbytes;
+ chd_data_t *chd = (chd_data_t *)malloc(sizeof(chd_data_t));
+
+ DEBUGP("Loading chd mphf\n");
+ mphf->data = chd;
+
+ nbytes = fread(&chd->packed_chd_phf_size, sizeof(cmph_uint32), (size_t)1, fd);
+ DEBUGP("Loading CHD_PH perfect hash function with %u bytes to disk\n", chd->packed_chd_phf_size);
+ chd->packed_chd_phf = (cmph_uint8 *) calloc((size_t)chd->packed_chd_phf_size,(size_t)1);
+ nbytes = fread(chd->packed_chd_phf, chd->packed_chd_phf_size, (size_t)1, fd);
+
+ nbytes = fread(&chd->packed_cr_size, sizeof(cmph_uint32), (size_t)1, fd);
+ DEBUGP("Loading Compressed rank structure, which has %u bytes\n", chd->packed_cr_size);
+ chd->packed_cr = (cmph_uint8 *) calloc((size_t)chd->packed_cr_size, (size_t)1);
+ nbytes = fread(chd->packed_cr, chd->packed_cr_size, (size_t)1, fd);
+}
+
+int chd_dump(cmph_t *mphf, FILE *fd)
+{
+ register size_t nbytes;
+ chd_data_t *data = (chd_data_t *)mphf->data;
+
+ __cmph_dump(mphf, fd);
+ // Dumping CHD_PH perfect hash function
+
+ DEBUGP("Dumping CHD_PH perfect hash function with %u bytes to disk\n", data->packed_chd_phf_size);
+ nbytes = fwrite(&data->packed_chd_phf_size, sizeof(cmph_uint32), (size_t)1, fd);
+ nbytes = fwrite(data->packed_chd_phf, data->packed_chd_phf_size, (size_t)1, fd);
+
+ DEBUGP("Dumping compressed rank structure with %u bytes to disk\n", buflen);
+ nbytes = fwrite(&data->packed_cr_size, sizeof(cmph_uint32), (size_t)1, fd);
+ nbytes = fwrite(data->packed_cr, data->packed_cr_size, (size_t)1, fd);
+
+ return 1;
+}
+
+void chd_destroy(cmph_t *mphf)
+{
+ chd_data_t *data = (chd_data_t *)mphf->data;
+ free(data->packed_chd_phf);
+ free(data->packed_cr);
+ free(data);
+ free(mphf);
+}
+
+static inline cmph_uint32 _chd_search(void * packed_chd_phf, void * packed_cr, const char *key, cmph_uint32 keylen)
+{
+ register cmph_uint32 bin_idx = cmph_search_packed(packed_chd_phf, key, keylen);
+ register cmph_uint32 rank = compressed_rank_query_packed(packed_cr, bin_idx);
+ return bin_idx - rank;
+}
+
+cmph_uint32 chd_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
+{
+ register chd_data_t * chd = mphf->data;
+ return _chd_search(chd->packed_chd_phf, chd->packed_cr, key, keylen);
+}
+
+void chd_pack(cmph_t *mphf, void *packed_mphf)
+{
+ chd_data_t *data = (chd_data_t *)mphf->data;
+ cmph_uint32 * ptr = packed_mphf;
+ cmph_uint8 * ptr8;
+
+ // packing packed_cr_size and packed_cr
+ *ptr = data->packed_cr_size;
+ ptr8 = (cmph_uint8 *) (ptr + 1);
+
+ memcpy(ptr8, data->packed_cr, data->packed_cr_size);
+ ptr8 += data->packed_cr_size;
+
+ ptr = (cmph_uint32 *) ptr8;
+ *ptr = data->packed_chd_phf_size;
+
+ ptr8 = (cmph_uint8 *) (ptr + 1);
+ memcpy(ptr8, data->packed_chd_phf, data->packed_chd_phf_size);
+}
+
+cmph_uint32 chd_packed_size(cmph_t *mphf)
+{
+ register chd_data_t *data = (chd_data_t *)mphf->data;
+ return (cmph_uint32)(sizeof(CMPH_ALGO) + 2*sizeof(cmph_uint32) + data->packed_cr_size + data->packed_chd_phf_size);
+
+}
+
+cmph_uint32 chd_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
+{
+
+ register cmph_uint32 * ptr = packed_mphf;
+ register cmph_uint32 packed_cr_size = *ptr++;
+ register cmph_uint8 * packed_chd_phf = ((cmph_uint8 *) ptr) + packed_cr_size + sizeof(cmph_uint32);
+ return _chd_search(packed_chd_phf, ptr, key, keylen);
+}
+
+
diff --git a/girepository/cmph/chd.h b/girepository/cmph/chd.h
new file mode 100644
index 00000000..e829df81
--- /dev/null
+++ b/girepository/cmph/chd.h
@@ -0,0 +1,59 @@
+#ifndef _CMPH_CHD_H__
+#define _CMPH_CHD_H__
+
+#include "cmph.h"
+
+typedef struct __chd_data_t chd_data_t;
+typedef struct __chd_config_data_t chd_config_data_t;
+
+/* Config API */
+chd_config_data_t *chd_config_new(cmph_config_t * mph);
+void chd_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
+
+/** \fn void chd_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin);
+ * \brief Allows to set the number of keys per bin.
+ * \param mph pointer to the configuration structure
+ * \param keys_per_bin value for the number of keys per bin
+ */
+void chd_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin);
+
+/** \fn void chd_config_set_b(cmph_config_t *mph, cmph_uint32 keys_per_bucket);
+ * \brief Allows to set the number of keys per bucket.
+ * \param mph pointer to the configuration structure
+ * \param keys_per_bucket value for the number of keys per bucket
+ */
+void chd_config_set_b(cmph_config_t *mph, cmph_uint32 keys_per_bucket);
+void chd_config_destroy(cmph_config_t *mph);
+
+
+/* Chd algorithm API */
+cmph_t *chd_new(cmph_config_t *mph, double c);
+void chd_load(FILE *fd, cmph_t *mphf);
+int chd_dump(cmph_t *mphf, FILE *fd);
+void chd_destroy(cmph_t *mphf);
+cmph_uint32 chd_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
+
+/** \fn void chd_pack(cmph_t *mphf, void *packed_mphf);
+ * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
+ * \param mphf pointer to the resulting mphf
+ * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
+ */
+void chd_pack(cmph_t *mphf, void *packed_mphf);
+
+/** \fn cmph_uint32 chd_packed_size(cmph_t *mphf);
+ * \brief Return the amount of space needed to pack mphf.
+ * \param mphf pointer to a mphf
+ * \return the size of the packed function or zero for failures
+ */
+cmph_uint32 chd_packed_size(cmph_t *mphf);
+
+/** cmph_uint32 chd_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
+ * \brief Use the packed mphf to do a search.
+ * \param packed_mphf pointer to the packed mphf
+ * \param key key to be hashed
+ * \param keylen key legth in bytes
+ * \return The mphf value
+ */
+cmph_uint32 chd_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
+
+#endif
diff --git a/girepository/cmph/chd_ph.c b/girepository/cmph/chd_ph.c
new file mode 100644
index 00000000..b34415b9
--- /dev/null
+++ b/girepository/cmph/chd_ph.c
@@ -0,0 +1,988 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<string.h>
+#include<math.h>
+#include<time.h>
+#include<assert.h>
+#include<limits.h>
+
+#include "cmph_structs.h"
+#include "chd_structs_ph.h"
+#include "chd_ph.h"
+#include"miller_rabin.h"
+#include"bitbool.h"
+
+
+//#define DEBUG
+#include "debug.h"
+
+// NO_ELEMENT is equivalent to null pointer
+#ifndef NO_ELEMENT
+#define NO_ELEMENT UINT_MAX
+#endif
+
+// struct used to represent items at mapping, ordering and searching phases
+struct _chd_ph_item_t
+{
+ cmph_uint32 f;
+ cmph_uint32 h;
+};
+typedef struct _chd_ph_item_t chd_ph_item_t;
+
+// struct to represent the items at mapping phase only.
+struct _chd_ph_map_item_t
+{
+ cmph_uint32 f;
+ cmph_uint32 h;
+ cmph_uint32 bucket_num;
+};
+typedef struct _chd_ph_map_item_t chd_ph_map_item_t;
+
+// struct to represent a bucket
+struct _chd_ph_bucket_t
+{
+ cmph_uint32 items_list; // offset
+ union
+ {
+ cmph_uint32 size;
+ cmph_uint32 bucket_id;
+ };
+};
+
+typedef struct _chd_ph_bucket_t chd_ph_bucket_t;
+
+struct _chd_ph_sorted_list_t
+{
+ cmph_uint32 buckets_list;
+ cmph_uint32 size;
+};
+
+typedef struct _chd_ph_sorted_list_t chd_ph_sorted_list_t;
+
+
+static inline chd_ph_bucket_t * chd_ph_bucket_new(cmph_uint32 nbuckets);
+static inline void chd_ph_bucket_clean(chd_ph_bucket_t * buckets, cmph_uint32 nbuckets);
+static inline void chd_ph_bucket_destroy(chd_ph_bucket_t * buckets);
+
+chd_ph_bucket_t * chd_ph_bucket_new(cmph_uint32 nbuckets)
+{
+ chd_ph_bucket_t * buckets = (chd_ph_bucket_t *) calloc(nbuckets, sizeof(chd_ph_bucket_t));
+ return buckets;
+}
+
+void chd_ph_bucket_clean(chd_ph_bucket_t * buckets, cmph_uint32 nbuckets)
+{
+ register cmph_uint32 i = 0;
+ assert(buckets);
+ for(i = 0; i < nbuckets; i++)
+ buckets[i].size = 0;
+}
+cmph_uint8 chd_ph_bucket_insert(chd_ph_bucket_t * buckets,chd_ph_map_item_t * map_items, chd_ph_item_t * items,
+ cmph_uint32 nbuckets,cmph_uint32 item_idx)
+{
+ register cmph_uint32 i = 0;
+ register chd_ph_item_t * tmp_item;
+ register chd_ph_map_item_t * tmp_map_item = map_items + item_idx;
+ register chd_ph_bucket_t * bucket = buckets + tmp_map_item->bucket_num;
+ tmp_item = items + bucket->items_list;
+
+ for(i = 0; i < bucket->size; i++)
+ {
+ if(tmp_item->f == tmp_map_item->f && tmp_item->h == tmp_map_item->h)
+ {
+ DEBUGP("Item not added\n");
+ return 0;
+ };
+ tmp_item++;
+ };
+ tmp_item->f = tmp_map_item->f;
+ tmp_item->h = tmp_map_item->h;
+ bucket->size++;
+ return 1;
+};
+void chd_ph_bucket_destroy(chd_ph_bucket_t * buckets)
+{
+ free(buckets);
+}
+
+static inline cmph_uint8 chd_ph_mapping(cmph_config_t *mph, chd_ph_bucket_t * buckets, chd_ph_item_t * items,
+ cmph_uint32 *max_bucket_size);
+
+static chd_ph_sorted_list_t * chd_ph_ordering(chd_ph_bucket_t ** _buckets,chd_ph_item_t ** items,
+ cmph_uint32 nbuckets,cmph_uint32 nitems, cmph_uint32 max_bucket_size);
+
+static cmph_uint8 chd_ph_searching(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, chd_ph_item_t *items ,
+ cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes, cmph_uint32 * disp_table);
+
+static inline double chd_ph_space_lower_bound(cmph_uint32 _n, cmph_uint32 _r)
+{
+ double r = _r, n = _n;
+ return (1 + (r/n - 1.0 + 1.0/(2.0*n))*log(1 - n/r))/log(2);
+};
+
+/* computes the entropy of non empty buckets.*/
+static inline double chd_ph_get_entropy(cmph_uint32 * disp_table, cmph_uint32 n, cmph_uint32 max_probes)
+{
+ register cmph_uint32 * probe_counts = (cmph_uint32 *) calloc(max_probes, sizeof(cmph_uint32));
+ register cmph_uint32 i;
+ register double entropy = 0;
+
+ for(i = 0; i < n; i++)
+ {
+ probe_counts[disp_table[i]]++;
+ };
+
+ for(i = 0; i < max_probes; i++)
+ {
+ if(probe_counts[i] > 0)
+ entropy -= probe_counts[i]*log((double)probe_counts[i]/(double)n)/log(2);
+ };
+ free(probe_counts);
+ return entropy;
+};
+
+chd_ph_config_data_t *chd_ph_config_new()
+{
+ chd_ph_config_data_t *chd_ph;
+ chd_ph = (chd_ph_config_data_t *)malloc(sizeof(chd_ph_config_data_t));
+ assert(chd_ph);
+ memset(chd_ph, 0, sizeof(chd_ph_config_data_t));
+
+ chd_ph->hashfunc = CMPH_HASH_JENKINS;
+ chd_ph->cs = NULL;
+ chd_ph->nbuckets = 0;
+ chd_ph->n = 0;
+ chd_ph->hl = NULL;
+
+ chd_ph->m = 0;
+ chd_ph->use_h = 1;
+ chd_ph->keys_per_bin = 1;
+ chd_ph->keys_per_bucket = 4;
+ chd_ph->occup_table = 0;
+
+ return chd_ph;
+}
+
+void chd_ph_config_destroy(cmph_config_t *mph)
+{
+ chd_ph_config_data_t *data = (chd_ph_config_data_t *) mph->data;
+ DEBUGP("Destroying algorithm dependent data\n");
+ if(data->occup_table)
+ {
+ free(data->occup_table);
+ data->occup_table = NULL;
+ }
+ free(data);
+}
+
+
+void chd_ph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
+{
+ chd_ph_config_data_t *chd_ph = (chd_ph_config_data_t *)mph->data;
+ CMPH_HASH *hashptr = hashfuncs;
+ cmph_uint32 i = 0;
+ while(*hashptr != CMPH_HASH_COUNT)
+ {
+ if (i >= 1) break; //chd_ph only uses one linear hash function
+ chd_ph->hashfunc = *hashptr;
+ ++i, ++hashptr;
+ }
+}
+
+
+void chd_ph_config_set_b(cmph_config_t *mph, cmph_uint32 keys_per_bucket)
+{
+ assert(mph);
+ chd_ph_config_data_t *chd_ph = (chd_ph_config_data_t *)mph->data;
+ if(keys_per_bucket < 1 || keys_per_bucket >= 15)
+ {
+ keys_per_bucket = 4;
+ }
+ chd_ph->keys_per_bucket = keys_per_bucket;
+}
+
+
+void chd_ph_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin)
+{
+ assert(mph);
+ chd_ph_config_data_t *chd_ph = (chd_ph_config_data_t *)mph->data;
+ if(keys_per_bin <= 1 || keys_per_bin >= 128)
+ {
+ keys_per_bin = 1;
+ }
+ chd_ph->keys_per_bin = keys_per_bin;
+}
+
+cmph_uint8 chd_ph_mapping(cmph_config_t *mph, chd_ph_bucket_t * buckets, chd_ph_item_t * items, cmph_uint32 *max_bucket_size)
+{
+ register cmph_uint32 i = 0, g = 0;
+ cmph_uint32 hl[3];
+ chd_ph_config_data_t *chd_ph = (chd_ph_config_data_t *)mph->data;
+ char * key = NULL;
+ cmph_uint32 keylen = 0;
+ chd_ph_map_item_t * map_item;
+ chd_ph_map_item_t * map_items = malloc(chd_ph->m*sizeof(chd_ph_map_item_t));
+ register cmph_uint32 mapping_iterations = 1000;
+ *max_bucket_size = 0;
+ while(1)
+ {
+ mapping_iterations--;
+ if (chd_ph->hl) hash_state_destroy(chd_ph->hl);
+ chd_ph->hl = hash_state_new(chd_ph->hashfunc, chd_ph->m);
+
+ chd_ph_bucket_clean(buckets, chd_ph->nbuckets);
+
+ mph->key_source->rewind(mph->key_source->data);
+
+ for(i = 0; i < chd_ph->m; i++)
+ {
+ mph->key_source->read(mph->key_source->data, &key, &keylen);
+ hash_vector(chd_ph->hl, key, keylen, hl);
+
+ map_item = (map_items + i);
+
+ g = hl[0] % chd_ph->nbuckets;
+ map_item->f = hl[1] % chd_ph->n;
+ map_item->h = hl[2] % (chd_ph->n - 1) + 1;
+ map_item->bucket_num=g;
+ mph->key_source->dispose(mph->key_source->data, key, keylen);
+// if(buckets[g].size == (chd_ph->keys_per_bucket << 2))
+// {
+// DEBUGP("BUCKET = %u -- SIZE = %u -- MAXIMUM SIZE = %u\n", g, buckets[g].size, (chd_ph->keys_per_bucket << 2));
+// goto error;
+// }
+ buckets[g].size++;
+ if(buckets[g].size > *max_bucket_size)
+ {
+ *max_bucket_size = buckets[g].size;
+ }
+ }
+ buckets[0].items_list = 0;
+ for(i = 1; i < chd_ph->nbuckets; i++)
+ {
+ buckets[i].items_list = buckets[i-1].items_list + buckets[i - 1].size;
+ buckets[i - 1].size = 0;
+ };
+ buckets[i - 1].size = 0;
+ for(i = 0; i < chd_ph->m; i++)
+ {
+ map_item = (map_items + i);
+ if(!chd_ph_bucket_insert(buckets, map_items, items, chd_ph->nbuckets, i))
+ break;
+ }
+ if(i == chd_ph->m)
+ {
+ free(map_items);
+ return 1; // SUCCESS
+ }
+
+ if(mapping_iterations == 0)
+ {
+ goto error;
+ }
+ }
+error:
+ free(map_items);
+ hash_state_destroy(chd_ph->hl);
+ chd_ph->hl = NULL;
+ return 0; // FAILURE
+}
+
+chd_ph_sorted_list_t * chd_ph_ordering(chd_ph_bucket_t ** _buckets, chd_ph_item_t ** _items,
+ cmph_uint32 nbuckets, cmph_uint32 nitems, cmph_uint32 max_bucket_size)
+{
+ chd_ph_sorted_list_t * sorted_lists = (chd_ph_sorted_list_t *) calloc(max_bucket_size + 1, sizeof(chd_ph_sorted_list_t));
+
+ chd_ph_bucket_t * input_buckets = (*_buckets);
+ chd_ph_bucket_t * output_buckets;
+ chd_ph_item_t * input_items = (*_items);
+ chd_ph_item_t * output_items;
+ register cmph_uint32 i, j, bucket_size, position, position2;
+// cmph_uint32 non_empty_buckets;
+ DEBUGP("MAX BUCKET SIZE = %u\n", max_bucket_size);
+ // Determine size of each list of buckets
+ for(i = 0; i < nbuckets; i++)
+ {
+ bucket_size = input_buckets[i].size;
+ if(bucket_size == 0)
+ continue;
+ sorted_lists[bucket_size].size++;
+ };
+ sorted_lists[1].buckets_list = 0;
+ // Determine final position of list of buckets into the contiguous array that will store all the buckets
+ for(i = 2; i <= max_bucket_size; i++)
+ {
+ sorted_lists[i].buckets_list = sorted_lists[i-1].buckets_list + sorted_lists[i-1].size;
+ sorted_lists[i-1].size = 0;
+ };
+ sorted_lists[i-1].size = 0;
+ // Store the buckets in a new array which is sorted by bucket sizes
+ output_buckets = calloc(nbuckets, sizeof(chd_ph_bucket_t)); // everything is initialized with zero
+// non_empty_buckets = nbuckets;
+
+ for(i = 0; i < nbuckets; i++)
+ {
+ bucket_size = input_buckets[i].size;
+ if(bucket_size == 0)
+ {
+// non_empty_buckets--;
+ continue;
+ };
+ position = sorted_lists[bucket_size].buckets_list + sorted_lists[bucket_size].size;
+ output_buckets[position].bucket_id = i;
+ output_buckets[position].items_list = input_buckets[i].items_list;
+ sorted_lists[bucket_size].size++;
+ };
+/* for(i = non_empty_buckets; i < nbuckets; i++)
+ output_buckets[i].size=0;*/
+ // Return the buckets sorted in new order and free the old buckets sorted in old order
+ free(input_buckets);
+ (*_buckets) = output_buckets;
+
+
+ // Store the items according to the new order of buckets.
+ output_items = (chd_ph_item_t*)calloc(nitems, sizeof(chd_ph_item_t));
+ position = 0;
+ i = 0;
+ for(bucket_size = 1; bucket_size <= max_bucket_size; bucket_size++)
+ {
+ for(i = sorted_lists[bucket_size].buckets_list; i < sorted_lists[bucket_size].size + sorted_lists[bucket_size].buckets_list; i++)
+ {
+ position2 = output_buckets[i].items_list;
+ output_buckets[i].items_list = position;
+ for(j = 0; j < bucket_size; j++)
+ {
+ output_items[position].f = input_items[position2].f;
+ output_items[position].h = input_items[position2].h;
+ position++;
+ position2++;
+ };
+ };
+ };
+ //Return the items sorted in new order and free the old items sorted in old order
+ free(input_items);
+ (*_items) = output_items;
+ return sorted_lists;
+};
+
+static inline cmph_uint8 place_bucket_probe(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets,
+ chd_ph_item_t *items, cmph_uint32 probe0_num, cmph_uint32 probe1_num,
+ cmph_uint32 bucket_num, cmph_uint32 size)
+{
+ register cmph_uint32 i;
+ register chd_ph_item_t * item;
+ register cmph_uint32 position;
+
+ item = items + buckets[bucket_num].items_list;
+ // try place bucket with probe_num
+ if(chd_ph->keys_per_bin > 1)
+ {
+ for(i = 0; i < size; i++) // placement
+ {
+ position = (cmph_uint32)((item->f + ((cmph_uint64)item->h)*probe0_num + probe1_num) % chd_ph->n);
+ if(chd_ph->occup_table[position] >= chd_ph->keys_per_bin)
+ {
+ break;
+ }
+ (chd_ph->occup_table[position])++;
+ item++;
+ };
+ } else
+ {
+ for(i = 0; i < size; i++) // placement
+ {
+ position = (cmph_uint32)((item->f + ((cmph_uint64)item->h)*probe0_num + probe1_num) % chd_ph->n);
+ if(GETBIT32(((cmph_uint32 *)chd_ph->occup_table), position))
+ {
+ break;
+ }
+ SETBIT32(((cmph_uint32*)chd_ph->occup_table), position);
+ item++;
+ };
+ };
+ if(i != size) // Undo the placement
+ {
+ item = items + buckets[bucket_num].items_list;
+ if(chd_ph->keys_per_bin > 1)
+ {
+ while(1)
+ {
+ if(i == 0)
+ {
+ break;
+ }
+ position = (cmph_uint32)((item->f + ((cmph_uint64 )item->h) * probe0_num + probe1_num) % chd_ph->n);
+ (chd_ph->occup_table[position])--;
+ item++;
+ i--;
+ };
+ } else
+ {
+ while(1)
+ {
+ if(i == 0)
+ {
+ break;
+ }
+ position = (cmph_uint32)((item->f + ((cmph_uint64 )item->h) * probe0_num + probe1_num) % chd_ph->n);
+ UNSETBIT32(((cmph_uint32*)chd_ph->occup_table), position);
+
+// ([position/32]^=(1<<(position%32));
+ item++;
+ i--;
+ };
+ };
+ return 0;
+ }
+ return 1;
+};
+
+static inline cmph_uint8 place_bucket(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, chd_ph_item_t * items, cmph_uint32 max_probes,
+ cmph_uint32 * disp_table, cmph_uint32 bucket_num, cmph_uint32 size)
+
+{
+ register cmph_uint32 probe0_num, probe1_num, probe_num;
+ probe0_num = 0;
+ probe1_num = 0;
+ probe_num = 0;
+
+ while(1)
+ {
+ if(place_bucket_probe(chd_ph, buckets, items, probe0_num, probe1_num, bucket_num,size))
+ {
+ disp_table[buckets[bucket_num].bucket_id] = probe0_num + probe1_num * chd_ph->n;
+ return 1;
+ }
+ probe0_num++;
+ if(probe0_num >= chd_ph->n)
+ {
+ probe0_num -= chd_ph->n;
+ probe1_num++;
+ };
+ probe_num++;
+ if(probe_num >= max_probes || probe1_num >= chd_ph->n)
+ {
+ return 0;
+ };
+ };
+ return 0;
+};
+
+static inline cmph_uint8 place_buckets1(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t * buckets, chd_ph_item_t *items,
+ cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes,
+ cmph_uint32 * disp_table)
+{
+ register cmph_uint32 i = 0;
+ register cmph_uint32 curr_bucket = 0;
+
+ for(i = max_bucket_size; i > 0; i--)
+ {
+ curr_bucket = sorted_lists[i].buckets_list;
+ while(curr_bucket < sorted_lists[i].size + sorted_lists[i].buckets_list)
+ {
+ if(!place_bucket(chd_ph, buckets, items, max_probes, disp_table, curr_bucket, i))
+ {
+ return 0;
+ }
+ curr_bucket++;
+ };
+ };
+ return 1;
+};
+
+static inline cmph_uint8 place_buckets2(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, chd_ph_item_t * items,
+ cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes,
+ cmph_uint32 * disp_table)
+{
+ register cmph_uint32 i,j, non_placed_bucket;
+ register cmph_uint32 curr_bucket;
+ register cmph_uint32 probe_num, probe0_num, probe1_num;
+ cmph_uint32 sorted_list_size;
+#ifdef DEBUG
+ cmph_uint32 items_list;
+ cmph_uint32 bucket_id;
+#endif
+ DEBUGP("USING HEURISTIC TO PLACE BUCKETS\n");
+ for(i = max_bucket_size; i > 0; i--)
+ {
+ probe_num = 0;
+ probe0_num = 0;
+ probe1_num = 0;
+ sorted_list_size = sorted_lists[i].size;
+ while(sorted_lists[i].size != 0)
+ {
+ curr_bucket = sorted_lists[i].buckets_list;
+ for(j = 0, non_placed_bucket = 0; j < sorted_lists[i].size; j++)
+ {
+ // if bucket is successfully placed remove it from list
+ if(place_bucket_probe(chd_ph, buckets, items, probe0_num, probe1_num, curr_bucket, i))
+ {
+ disp_table[buckets[curr_bucket].bucket_id] = probe0_num + probe1_num * chd_ph->n;
+// DEBUGP("BUCKET %u PLACED --- DISPLACEMENT = %u\n", curr_bucket, disp_table[curr_bucket]);
+ }
+ else
+ {
+// DEBUGP("BUCKET %u NOT PLACED\n", curr_bucket);
+#ifdef DEBUG
+ items_list = buckets[non_placed_bucket + sorted_lists[i].buckets_list].items_list;
+ bucket_id = buckets[non_placed_bucket + sorted_lists[i].buckets_list].bucket_id;
+#endif
+ buckets[non_placed_bucket + sorted_lists[i].buckets_list].items_list = buckets[curr_bucket].items_list;
+ buckets[non_placed_bucket + sorted_lists[i].buckets_list].bucket_id = buckets[curr_bucket].bucket_id;
+#ifdef DEBUG
+ buckets[curr_bucket].items_list=items_list;
+ buckets[curr_bucket].bucket_id=bucket_id;
+#endif
+ non_placed_bucket++;
+ }
+ curr_bucket++;
+ };
+ sorted_lists[i].size = non_placed_bucket;
+ probe0_num++;
+ if(probe0_num >= chd_ph->n)
+ {
+ probe0_num -= chd_ph->n;
+ probe1_num++;
+ };
+ probe_num++;
+ if(probe_num >= max_probes || probe1_num >= chd_ph->n)
+ {
+ sorted_lists[i].size = sorted_list_size;
+ return 0;
+ };
+ };
+ sorted_lists[i].size = sorted_list_size;
+ };
+ return 1;
+};
+
+cmph_uint8 chd_ph_searching(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, chd_ph_item_t *items ,
+ cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes,
+ cmph_uint32 * disp_table)
+{
+ if(chd_ph->use_h)
+ {
+ return place_buckets2(chd_ph, buckets, items, max_bucket_size, sorted_lists, max_probes, disp_table);
+ }
+ else
+ {
+ return place_buckets1(chd_ph, buckets, items, max_bucket_size, sorted_lists, max_probes, disp_table);
+ }
+
+}
+
+static inline cmph_uint8 chd_ph_check_bin_hashing(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, chd_ph_item_t *items,
+ cmph_uint32 * disp_table, chd_ph_sorted_list_t * sorted_lists,cmph_uint32 max_bucket_size)
+{
+ register cmph_uint32 bucket_size, i, j;
+ register cmph_uint32 position, probe0_num, probe1_num;
+ register cmph_uint32 m = 0;
+ register chd_ph_item_t * item;
+ if(chd_ph->keys_per_bin > 1)
+ memset(chd_ph->occup_table, 0, chd_ph->n);
+ else
+ memset(chd_ph->occup_table, 0, ((chd_ph->n + 31)/32) * sizeof(cmph_uint32));
+
+ for(bucket_size = 1; bucket_size <= max_bucket_size; bucket_size++)
+ for(i = sorted_lists[bucket_size].buckets_list; i < sorted_lists[bucket_size].size +
+ sorted_lists[bucket_size].buckets_list; i++)
+ {
+ j = bucket_size;
+ item = items + buckets[i].items_list;
+ probe0_num = disp_table[buckets[i].bucket_id] % chd_ph->n;
+ probe1_num = disp_table[buckets[i].bucket_id] / chd_ph->n;
+ for(; j > 0; j--)
+ {
+ m++;
+ position = (cmph_uint32)((item->f + ((cmph_uint64 )item->h) * probe0_num + probe1_num) % chd_ph->n);
+ if(chd_ph->keys_per_bin > 1)
+ {
+ if(chd_ph->occup_table[position] >= chd_ph->keys_per_bin)
+ {
+ return 0;
+ }
+ (chd_ph->occup_table[position])++;
+ }
+ else
+ {
+ if(GETBIT32(((cmph_uint32*)chd_ph->occup_table), position))
+ {
+ return 0;
+ }
+ SETBIT32(((cmph_uint32*)chd_ph->occup_table), position);
+ };
+ item++;
+ };
+ };
+ DEBUGP("We were able to place m = %u keys\n", m);
+ return 1;
+};
+
+
+cmph_t *chd_ph_new(cmph_config_t *mph, double c)
+{
+ cmph_t *mphf = NULL;
+ chd_ph_data_t *chd_phf = NULL;
+ chd_ph_config_data_t *chd_ph = (chd_ph_config_data_t *)mph->data;
+
+ register double load_factor = c;
+ register cmph_uint8 searching_success = 0;
+ register cmph_uint32 max_probes = 1 << 20; // default value for max_probes
+ register cmph_uint32 iterations = 100;
+ chd_ph_bucket_t * buckets = NULL;
+ chd_ph_item_t * items = NULL;
+ register cmph_uint8 failure = 0;
+ cmph_uint32 max_bucket_size = 0;
+ chd_ph_sorted_list_t * sorted_lists = NULL;
+ cmph_uint32 * disp_table = NULL;
+ register double space_lower_bound = 0;
+ #ifdef CMPH_TIMING
+ double construction_time_begin = 0.0;
+ double construction_time = 0.0;
+ ELAPSED_TIME_IN_SECONDS(&construction_time_begin);
+ #endif
+
+
+ chd_ph->m = mph->key_source->nkeys;
+ DEBUGP("m = %u\n", chd_ph->m);
+
+ chd_ph->nbuckets = (cmph_uint32)(chd_ph->m/chd_ph->keys_per_bucket) + 1;
+ DEBUGP("nbuckets = %u\n", chd_ph->nbuckets);
+
+ if(load_factor < 0.5 )
+ {
+ load_factor = 0.5;
+ }
+
+ if(load_factor >= 0.99)
+ {
+ load_factor = 0.99;
+ }
+
+ DEBUGP("load_factor = %.3f\n", load_factor);
+
+ chd_ph->n = (cmph_uint32)(chd_ph->m/(chd_ph->keys_per_bin * load_factor)) + 1;
+
+ //Round the number of bins to the prime immediately above
+ if(chd_ph->n % 2 == 0) chd_ph->n++;
+ for(;;)
+ {
+ if(check_primality(chd_ph->n) == 1)
+ break;
+ chd_ph->n += 2; // just odd numbers can be primes for n > 2
+
+ };
+
+ DEBUGP("n = %u \n", chd_ph->n);
+ if(chd_ph->keys_per_bin == 1)
+ {
+ space_lower_bound = chd_ph_space_lower_bound(chd_ph->m, chd_ph->n);
+ }
+
+ if(mph->verbosity)
+ {
+ fprintf(stderr, "space lower bound is %.3f bits per key\n", space_lower_bound);
+ }
+
+ // We allocate the working tables
+ buckets = chd_ph_bucket_new(chd_ph->nbuckets);
+ items = (chd_ph_item_t *) calloc(chd_ph->m, sizeof(chd_ph_item_t));
+
+ max_probes = (cmph_uint32)(((log(chd_ph->m)/log(2))/20) * max_probes);
+
+ if(chd_ph->keys_per_bin == 1)
+ chd_ph->occup_table = (cmph_uint8 *) calloc(((chd_ph->n + 31)/32), sizeof(cmph_uint32));
+ else
+ chd_ph->occup_table = (cmph_uint8 *) calloc(chd_ph->n, sizeof(cmph_uint8));
+
+ disp_table = (cmph_uint32 *) calloc(chd_ph->nbuckets, sizeof(cmph_uint32));
+//
+// init_genrand(time(0));
+
+ while(1)
+ {
+ iterations --;
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Starting mapping step for mph creation of %u keys with %u bins\n", chd_ph->m, chd_ph->n);
+ }
+
+ if(!chd_ph_mapping(mph, buckets, items, &max_bucket_size))
+ {
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Failure in mapping step\n");
+ }
+ failure = 1;
+ goto cleanup;
+ }
+
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Starting ordering step\n");
+ }
+ if(sorted_lists)
+ {
+ free(sorted_lists);
+ }
+
+ sorted_lists = chd_ph_ordering(&buckets, &items, chd_ph->nbuckets, chd_ph->m, max_bucket_size);
+
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Starting searching step\n");
+ }
+
+ searching_success = chd_ph_searching(chd_ph, buckets, items, max_bucket_size, sorted_lists, max_probes, disp_table);
+ if(searching_success) break;
+
+ // reset occup_table
+ if(chd_ph->keys_per_bin > 1)
+ memset(chd_ph->occup_table, 0, chd_ph->n);
+ else
+ memset(chd_ph->occup_table, 0, ((chd_ph->n + 31)/32) * sizeof(cmph_uint32));
+ if(iterations == 0)
+ {
+ // Cleanup memory
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Failure because the max trials was exceeded\n");
+ }
+ failure = 1;
+ goto cleanup;
+ };
+ }
+
+ #ifdef DEBUG
+ {
+ if(!chd_ph_check_bin_hashing(chd_ph, buckets, items, disp_table,sorted_lists,max_bucket_size))
+ {
+
+ DEBUGP("Error for bin packing generation");
+ failure = 1;
+ goto cleanup;
+ }
+ }
+ #endif
+
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Starting compressing step\n");
+ }
+
+ if(chd_ph->cs)
+ {
+ free(chd_ph->cs);
+ }
+ chd_ph->cs = (compressed_seq_t *) calloc(1, sizeof(compressed_seq_t));
+ compressed_seq_init(chd_ph->cs);
+ compressed_seq_generate(chd_ph->cs, disp_table, chd_ph->nbuckets);
+
+ #ifdef CMPH_TIMING
+ ELAPSED_TIME_IN_SECONDS(&construction_time);
+ register double entropy = chd_ph_get_entropy(disp_table, chd_ph->nbuckets, max_probes);
+ DEBUGP("Entropy = %.4f\n", entropy/chd_ph->m);
+ #endif
+
+cleanup:
+ chd_ph_bucket_destroy(buckets);
+ free(items);
+ free(sorted_lists);
+ free(disp_table);
+ if(failure)
+ {
+ if(chd_ph->hl)
+ {
+ hash_state_destroy(chd_ph->hl);
+ }
+ chd_ph->hl = NULL;
+ return NULL;
+ }
+
+ mphf = (cmph_t *)malloc(sizeof(cmph_t));
+ mphf->algo = mph->algo;
+ chd_phf = (chd_ph_data_t *)malloc(sizeof(chd_ph_data_t));
+
+ chd_phf->cs = chd_ph->cs;
+ chd_ph->cs = NULL; //transfer memory ownership
+ chd_phf->hl = chd_ph->hl;
+ chd_ph->hl = NULL; //transfer memory ownership
+ chd_phf->n = chd_ph->n;
+ chd_phf->nbuckets = chd_ph->nbuckets;
+
+ mphf->data = chd_phf;
+ mphf->size = chd_ph->n;
+
+ DEBUGP("Successfully generated minimal perfect hash\n");
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Successfully generated minimal perfect hash function\n");
+ }
+
+ #ifdef CMPH_TIMING
+ register cmph_uint32 space_usage = chd_ph_packed_size(mphf)*8;
+ construction_time = construction_time - construction_time_begin;
+ fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\t%.4f\t%.4f\n", chd_ph->m, load_factor, chd_ph->keys_per_bucket, construction_time, space_usage/(double)chd_ph->m, space_lower_bound, entropy/chd_ph->m);
+ #endif
+
+ return mphf;
+}
+
+
+
+void chd_ph_load(FILE *fd, cmph_t *mphf)
+{
+ char *buf = NULL;
+ cmph_uint32 buflen;
+ register size_t nbytes;
+ chd_ph_data_t *chd_ph = (chd_ph_data_t *)malloc(sizeof(chd_ph_data_t));
+
+ DEBUGP("Loading chd_ph mphf\n");
+ mphf->data = chd_ph;
+
+ nbytes = fread(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
+ DEBUGP("Hash state has %u bytes\n", buflen);
+ buf = (char *)malloc((size_t)buflen);
+ nbytes = fread(buf, (size_t)buflen, (size_t)1, fd);
+ chd_ph->hl = hash_state_load(buf, buflen);
+ free(buf);
+
+ nbytes = fread(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
+ DEBUGP("Compressed sequence structure has %u bytes\n", buflen);
+ buf = (char *)malloc((size_t)buflen);
+ nbytes = fread(buf, (size_t)buflen, (size_t)1, fd);
+ chd_ph->cs = (compressed_seq_t *) calloc(1, sizeof(compressed_seq_t));
+ compressed_seq_load(chd_ph->cs, buf, buflen);
+ free(buf);
+
+ // loading n and nbuckets
+ DEBUGP("Reading n and nbuckets\n");
+ nbytes = fread(&(chd_ph->n), sizeof(cmph_uint32), (size_t)1, fd);
+ nbytes = fread(&(chd_ph->nbuckets), sizeof(cmph_uint32), (size_t)1, fd);
+}
+
+int chd_ph_dump(cmph_t *mphf, FILE *fd)
+{
+ char *buf = NULL;
+ cmph_uint32 buflen;
+ register size_t nbytes;
+ chd_ph_data_t *data = (chd_ph_data_t *)mphf->data;
+
+ __cmph_dump(mphf, fd);
+
+ hash_state_dump(data->hl, &buf, &buflen);
+ DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
+ nbytes = fwrite(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
+ nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd);
+ free(buf);
+
+ compressed_seq_dump(data->cs, &buf, &buflen);
+ DEBUGP("Dumping compressed sequence structure with %u bytes to disk\n", buflen);
+ nbytes = fwrite(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
+ nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd);
+ free(buf);
+
+ // dumping n and nbuckets
+ nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd);
+ nbytes = fwrite(&(data->nbuckets), sizeof(cmph_uint32), (size_t)1, fd);
+ return 1;
+}
+
+void chd_ph_destroy(cmph_t *mphf)
+{
+ chd_ph_data_t *data = (chd_ph_data_t *)mphf->data;
+ compressed_seq_destroy(data->cs);
+ free(data->cs);
+ hash_state_destroy(data->hl);
+ free(data);
+ free(mphf);
+
+}
+
+cmph_uint32 chd_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
+{
+ register chd_ph_data_t * chd_ph = mphf->data;
+ cmph_uint32 hl[3];
+ register cmph_uint32 disp,position;
+ register cmph_uint32 probe0_num,probe1_num;
+ register cmph_uint32 f,g,h;
+ hash_vector(chd_ph->hl, key, keylen, hl);
+ g = hl[0] % chd_ph->nbuckets;
+ f = hl[1] % chd_ph->n;
+ h = hl[2] % (chd_ph->n-1) + 1;
+
+ disp = compressed_seq_query(chd_ph->cs, g);
+ probe0_num = disp % chd_ph->n;
+ probe1_num = disp/chd_ph->n;
+ position = (cmph_uint32)((f + ((cmph_uint64 )h)*probe0_num + probe1_num) % chd_ph->n);
+ return position;
+}
+
+void chd_ph_pack(cmph_t *mphf, void *packed_mphf)
+{
+ chd_ph_data_t *data = (chd_ph_data_t *)mphf->data;
+ cmph_uint8 * ptr = packed_mphf;
+
+ // packing hl type
+ CMPH_HASH hl_type = hash_get_type(data->hl);
+ *((cmph_uint32 *) ptr) = hl_type;
+ ptr += sizeof(cmph_uint32);
+
+ // packing hl
+ hash_state_pack(data->hl, ptr);
+ ptr += hash_state_packed_size(hl_type);
+
+ // packing n
+ *((cmph_uint32 *) ptr) = data->n;
+ ptr += sizeof(data->n);
+
+ // packing nbuckets
+ *((cmph_uint32 *) ptr) = data->nbuckets;
+ ptr += sizeof(data->nbuckets);
+
+ // packing cs
+ compressed_seq_pack(data->cs, ptr);
+ //ptr += compressed_seq_packed_size(data->cs);
+
+}
+
+cmph_uint32 chd_ph_packed_size(cmph_t *mphf)
+{
+ register chd_ph_data_t *data = (chd_ph_data_t *)mphf->data;
+ register CMPH_HASH hl_type = hash_get_type(data->hl);
+ register cmph_uint32 hash_state_pack_size = hash_state_packed_size(hl_type);
+ register cmph_uint32 cs_pack_size = compressed_seq_packed_size(data->cs);
+
+ return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_pack_size + cs_pack_size + 3*sizeof(cmph_uint32));
+
+}
+
+cmph_uint32 chd_ph_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
+{
+ register CMPH_HASH hl_type = *(cmph_uint32 *)packed_mphf;
+ register cmph_uint8 *hl_ptr = (cmph_uint8 *)(packed_mphf) + 4;
+
+ register cmph_uint32 * ptr = (cmph_uint32 *)(hl_ptr + hash_state_packed_size(hl_type));
+ register cmph_uint32 n = *ptr++;
+ register cmph_uint32 nbuckets = *ptr++;
+ cmph_uint32 hl[3];
+
+ register cmph_uint32 disp,position;
+ register cmph_uint32 probe0_num,probe1_num;
+ register cmph_uint32 f,g,h;
+
+ hash_vector_packed(hl_ptr, hl_type, key, keylen, hl);
+
+ g = hl[0] % nbuckets;
+ f = hl[1] % n;
+ h = hl[2] % (n-1) + 1;
+
+ disp = compressed_seq_query_packed(ptr, g);
+ probe0_num = disp % n;
+ probe1_num = disp/n;
+ position = (cmph_uint32)((f + ((cmph_uint64 )h)*probe0_num + probe1_num) % n);
+ return position;
+}
+
+
+
diff --git a/girepository/cmph/chd_ph.h b/girepository/cmph/chd_ph.h
new file mode 100644
index 00000000..d2bdb028
--- /dev/null
+++ b/girepository/cmph/chd_ph.h
@@ -0,0 +1,59 @@
+#ifndef _CMPH_CHD_PH_H__
+#define _CMPH_CHD_PH_H__
+
+#include "cmph.h"
+
+typedef struct __chd_ph_data_t chd_ph_data_t;
+typedef struct __chd_ph_config_data_t chd_ph_config_data_t;
+
+/* Config API */
+chd_ph_config_data_t *chd_ph_config_new();
+void chd_ph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
+
+/** \fn void chd_ph_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin);
+ * \brief Allows to set the number of keys per bin.
+ * \param mph pointer to the configuration structure
+ * \param keys_per_bin value for the number of keys per bin
+ */
+void chd_ph_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin);
+
+/** \fn void chd_ph_config_set_b(cmph_config_t *mph, cmph_uint32 keys_per_bucket);
+ * \brief Allows to set the number of keys per bucket.
+ * \param mph pointer to the configuration structure
+ * \param keys_per_bucket value for the number of keys per bucket
+ */
+void chd_ph_config_set_b(cmph_config_t *mph, cmph_uint32 keys_per_bucket);
+void chd_ph_config_destroy(cmph_config_t *mph);
+
+
+/* Chd algorithm API */
+cmph_t *chd_ph_new(cmph_config_t *mph, double c);
+void chd_ph_load(FILE *fd, cmph_t *mphf);
+int chd_ph_dump(cmph_t *mphf, FILE *fd);
+void chd_ph_destroy(cmph_t *mphf);
+cmph_uint32 chd_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
+
+/** \fn void chd_ph_pack(cmph_t *mphf, void *packed_mphf);
+ * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
+ * \param mphf pointer to the resulting mphf
+ * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
+ */
+void chd_ph_pack(cmph_t *mphf, void *packed_mphf);
+
+/** \fn cmph_uint32 chd_ph_packed_size(cmph_t *mphf);
+ * \brief Return the amount of space needed to pack mphf.
+ * \param mphf pointer to a mphf
+ * \return the size of the packed function or zero for failures
+ */
+cmph_uint32 chd_ph_packed_size(cmph_t *mphf);
+
+/** cmph_uint32 chd_ph_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
+ * \brief Use the packed mphf to do a search.
+ * \param packed_mphf pointer to the packed mphf
+ * \param key key to be hashed
+ * \param keylen key legth in bytes
+ * \return The mphf value
+ */
+cmph_uint32 chd_ph_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
+
+#endif
diff --git a/girepository/cmph/chd_structs.h b/girepository/cmph/chd_structs.h
new file mode 100644
index 00000000..d62f6826
--- /dev/null
+++ b/girepository/cmph/chd_structs.h
@@ -0,0 +1,21 @@
+#ifndef __CMPH_CHD_STRUCTS_H__
+#define __CMPH_CHD_STRUCTS_H__
+
+#include "chd_structs_ph.h"
+#include "chd_ph.h"
+#include "compressed_rank.h"
+
+struct __chd_data_t
+{
+ cmph_uint32 packed_cr_size;
+ cmph_uint8 * packed_cr; // packed compressed rank structure to control the number of zeros in a bit vector
+
+ cmph_uint32 packed_chd_phf_size;
+ cmph_uint8 * packed_chd_phf;
+};
+
+struct __chd_config_data_t
+{
+ cmph_config_t *chd_ph; // chd_ph algorithm must be used here
+};
+#endif
diff --git a/girepository/cmph/chd_structs_ph.h b/girepository/cmph/chd_structs_ph.h
new file mode 100644
index 00000000..d8692182
--- /dev/null
+++ b/girepository/cmph/chd_structs_ph.h
@@ -0,0 +1,29 @@
+#ifndef __CMPH_CHD_PH_STRUCTS_H__
+#define __CMPH_CHD_PH_STRUCTS_H__
+
+#include "hash_state.h"
+#include "compressed_seq.h"
+
+struct __chd_ph_data_t
+{
+ compressed_seq_t * cs; // compressed displacement values
+ cmph_uint32 nbuckets; // number of buckets
+ cmph_uint32 n; // number of bins
+ hash_state_t *hl; // linear hash function
+};
+
+struct __chd_ph_config_data_t
+{
+ CMPH_HASH hashfunc; // linear hash function to be used
+ compressed_seq_t * cs; // compressed displacement values
+ cmph_uint32 nbuckets; // number of buckets
+ cmph_uint32 n; // number of bins
+ hash_state_t *hl; // linear hash function
+
+ cmph_uint32 m; // number of keys
+ cmph_uint8 use_h; // flag to indicate the of use of a heuristic (use_h = 1)
+ cmph_uint32 keys_per_bin;//maximum number of keys per bin
+ cmph_uint32 keys_per_bucket; // average number of keys per bucket
+ cmph_uint8 *occup_table; // table that indicates occupied positions
+};
+#endif
diff --git a/girepository/cmph/chm.c b/girepository/cmph/chm.c
new file mode 100644
index 00000000..e03cca80
--- /dev/null
+++ b/girepository/cmph/chm.c
@@ -0,0 +1,381 @@
+#include "graph.h"
+#include "chm.h"
+#include "cmph_structs.h"
+#include "chm_structs.h"
+#include "hash.h"
+#include "bitbool.h"
+
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+
+//#define DEBUG
+#include "debug.h"
+
+static int chm_gen_edges(cmph_config_t *mph);
+static void chm_traverse(chm_config_data_t *chm, cmph_uint8 *visited, cmph_uint32 v);
+
+chm_config_data_t *chm_config_new()
+{
+ chm_config_data_t *chm = NULL;
+ chm = (chm_config_data_t *)malloc(sizeof(chm_config_data_t));
+ assert(chm);
+ memset(chm, 0, sizeof(chm_config_data_t));
+ chm->hashfuncs[0] = CMPH_HASH_JENKINS;
+ chm->hashfuncs[1] = CMPH_HASH_JENKINS;
+ chm->g = NULL;
+ chm->graph = NULL;
+ chm->hashes = NULL;
+ return chm;
+}
+void chm_config_destroy(cmph_config_t *mph)
+{
+ chm_config_data_t *data = (chm_config_data_t *)mph->data;
+ DEBUGP("Destroying algorithm dependent data\n");
+ free(data);
+}
+
+void chm_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
+{
+ chm_config_data_t *chm = (chm_config_data_t *)mph->data;
+ CMPH_HASH *hashptr = hashfuncs;
+ cmph_uint32 i = 0;
+ while(*hashptr != CMPH_HASH_COUNT)
+ {
+ if (i >= 2) break; //chm only uses two hash functions
+ chm->hashfuncs[i] = *hashptr;
+ ++i, ++hashptr;
+ }
+}
+
+cmph_t *chm_new(cmph_config_t *mph, double c)
+{
+ cmph_t *mphf = NULL;
+ chm_data_t *chmf = NULL;
+
+ cmph_uint32 i;
+ cmph_uint32 iterations = 20;
+ cmph_uint8 *visited = NULL;
+ chm_config_data_t *chm = (chm_config_data_t *)mph->data;
+ chm->m = mph->key_source->nkeys;
+ if (c == 0) c = 2.09;
+ chm->n = (cmph_uint32)ceil(c * mph->key_source->nkeys);
+ DEBUGP("m (edges): %u n (vertices): %u c: %f\n", chm->m, chm->n, c);
+ chm->graph = graph_new(chm->n, chm->m);
+ DEBUGP("Created graph\n");
+
+ chm->hashes = (hash_state_t **)malloc(sizeof(hash_state_t *)*3);
+ for(i = 0; i < 3; ++i) chm->hashes[i] = NULL;
+ //Mapping step
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Entering mapping step for mph creation of %u keys with graph sized %u\n", chm->m, chm->n);
+ }
+ while(1)
+ {
+ int ok;
+ chm->hashes[0] = hash_state_new(chm->hashfuncs[0], chm->n);
+ chm->hashes[1] = hash_state_new(chm->hashfuncs[1], chm->n);
+ ok = chm_gen_edges(mph);
+ if (!ok)
+ {
+ --iterations;
+ hash_state_destroy(chm->hashes[0]);
+ chm->hashes[0] = NULL;
+ hash_state_destroy(chm->hashes[1]);
+ chm->hashes[1] = NULL;
+ DEBUGP("%u iterations remaining\n", iterations);
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Acyclic graph creation failure - %u iterations remaining\n", iterations);
+ }
+ if (iterations == 0) break;
+ }
+ else break;
+ }
+ if (iterations == 0)
+ {
+ graph_destroy(chm->graph);
+ return NULL;
+ }
+
+ //Assignment step
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Starting assignment step\n");
+ }
+ DEBUGP("Assignment step\n");
+ visited = (cmph_uint8 *)malloc((size_t)(chm->n/8 + 1));
+ memset(visited, 0, (size_t)(chm->n/8 + 1));
+ free(chm->g);
+ chm->g = (cmph_uint32 *)malloc(chm->n * sizeof(cmph_uint32));
+ assert(chm->g);
+ for (i = 0; i < chm->n; ++i)
+ {
+ if (!GETBIT(visited,i))
+ {
+ chm->g[i] = 0;
+ chm_traverse(chm, visited, i);
+ }
+ }
+ graph_destroy(chm->graph);
+ free(visited);
+ chm->graph = NULL;
+
+ mphf = (cmph_t *)malloc(sizeof(cmph_t));
+ mphf->algo = mph->algo;
+ chmf = (chm_data_t *)malloc(sizeof(chm_data_t));
+ chmf->g = chm->g;
+ chm->g = NULL; //transfer memory ownership
+ chmf->hashes = chm->hashes;
+ chm->hashes = NULL; //transfer memory ownership
+ chmf->n = chm->n;
+ chmf->m = chm->m;
+ mphf->data = chmf;
+ mphf->size = chm->m;
+ DEBUGP("Successfully generated minimal perfect hash\n");
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Successfully generated minimal perfect hash function\n");
+ }
+ return mphf;
+}
+
+static void chm_traverse(chm_config_data_t *chm, cmph_uint8 *visited, cmph_uint32 v)
+{
+
+ graph_iterator_t it = graph_neighbors_it(chm->graph, v);
+ cmph_uint32 neighbor = 0;
+ SETBIT(visited,v);
+
+ DEBUGP("Visiting vertex %u\n", v);
+ while((neighbor = graph_next_neighbor(chm->graph, &it)) != GRAPH_NO_NEIGHBOR)
+ {
+ DEBUGP("Visiting neighbor %u\n", neighbor);
+ if(GETBIT(visited,neighbor)) continue;
+ DEBUGP("Visiting neighbor %u\n", neighbor);
+ DEBUGP("Visiting edge %u->%u with id %u\n", v, neighbor, graph_edge_id(chm->graph, v, neighbor));
+ chm->g[neighbor] = graph_edge_id(chm->graph, v, neighbor) - chm->g[v];
+ DEBUGP("g is %u (%u - %u mod %u)\n", chm->g[neighbor], graph_edge_id(chm->graph, v, neighbor), chm->g[v], chm->m);
+ chm_traverse(chm, visited, neighbor);
+ }
+}
+
+static int chm_gen_edges(cmph_config_t *mph)
+{
+ cmph_uint32 e;
+ chm_config_data_t *chm = (chm_config_data_t *)mph->data;
+ int cycles = 0;
+
+ DEBUGP("Generating edges for %u vertices with hash functions %s and %s\n", chm->n, cmph_hash_names[chm->hashfuncs[0]], cmph_hash_names[chm->hashfuncs[1]]);
+ graph_clear_edges(chm->graph);
+ mph->key_source->rewind(mph->key_source->data);
+ for (e = 0; e < mph->key_source->nkeys; ++e)
+ {
+ cmph_uint32 h1, h2;
+ cmph_uint32 keylen;
+ char *key;
+ mph->key_source->read(mph->key_source->data, &key, &keylen);
+ h1 = hash(chm->hashes[0], key, keylen) % chm->n;
+ h2 = hash(chm->hashes[1], key, keylen) % chm->n;
+ if (h1 == h2) if (++h2 >= chm->n) h2 = 0;
+ if (h1 == h2)
+ {
+ if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e);
+ mph->key_source->dispose(mph->key_source->data, key, keylen);
+ return 0;
+ }
+ DEBUGP("Adding edge: %u -> %u for key %s\n", h1, h2, key);
+ mph->key_source->dispose(mph->key_source->data, key, keylen);
+ graph_add_edge(chm->graph, h1, h2);
+ }
+ cycles = graph_is_cyclic(chm->graph);
+ if (mph->verbosity && cycles) fprintf(stderr, "Cyclic graph generated\n");
+ DEBUGP("Looking for cycles: %u\n", cycles);
+
+ return ! cycles;
+}
+
+int chm_dump(cmph_t *mphf, FILE *fd)
+{
+ char *buf = NULL;
+ cmph_uint32 buflen;
+ cmph_uint32 two = 2; //number of hash functions
+ chm_data_t *data = (chm_data_t *)mphf->data;
+ register size_t nbytes;
+
+ __cmph_dump(mphf, fd);
+
+ nbytes = fwrite(&two, sizeof(cmph_uint32), (size_t)1, fd);
+ hash_state_dump(data->hashes[0], &buf, &buflen);
+ DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
+ nbytes = fwrite(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
+ nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd);
+ free(buf);
+
+ hash_state_dump(data->hashes[1], &buf, &buflen);
+ DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
+ nbytes = fwrite(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
+ nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd);
+ free(buf);
+
+ nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd);
+ nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
+
+ nbytes = fwrite(data->g, sizeof(cmph_uint32)*data->n, (size_t)1, fd);
+/* #ifdef DEBUG
+ fprintf(stderr, "G: ");
+ for (i = 0; i < data->n; ++i) fprintf(stderr, "%u ", data->g[i]);
+ fprintf(stderr, "\n");
+ #endif*/
+ return 1;
+}
+
+void chm_load(FILE *f, cmph_t *mphf)
+{
+ cmph_uint32 nhashes;
+ char *buf = NULL;
+ cmph_uint32 buflen;
+ cmph_uint32 i;
+ chm_data_t *chm = (chm_data_t *)malloc(sizeof(chm_data_t));
+ register size_t nbytes;
+ DEBUGP("Loading chm mphf\n");
+ mphf->data = chm;
+ nbytes = fread(&nhashes, sizeof(cmph_uint32), (size_t)1, f);
+ chm->hashes = (hash_state_t **)malloc(sizeof(hash_state_t *)*(nhashes + 1));
+ chm->hashes[nhashes] = NULL;
+ DEBUGP("Reading %u hashes\n", nhashes);
+ for (i = 0; i < nhashes; ++i)
+ {
+ hash_state_t *state = NULL;
+ nbytes = fread(&buflen, sizeof(cmph_uint32), (size_t)1, f);
+ DEBUGP("Hash state has %u bytes\n", buflen);
+ buf = (char *)malloc((size_t)buflen);
+ nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
+ state = hash_state_load(buf, buflen);
+ chm->hashes[i] = state;
+ free(buf);
+ }
+
+ DEBUGP("Reading m and n\n");
+ nbytes = fread(&(chm->n), sizeof(cmph_uint32), (size_t)1, f);
+ nbytes = fread(&(chm->m), sizeof(cmph_uint32), (size_t)1, f);
+
+ chm->g = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*chm->n);
+ nbytes = fread(chm->g, chm->n*sizeof(cmph_uint32), (size_t)1, f);
+ #ifdef DEBUG
+ fprintf(stderr, "G: ");
+ for (i = 0; i < chm->n; ++i) fprintf(stderr, "%u ", chm->g[i]);
+ fprintf(stderr, "\n");
+ #endif
+ return;
+}
+
+
+cmph_uint32 chm_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
+{
+ chm_data_t *chm = mphf->data;
+ cmph_uint32 h1 = hash(chm->hashes[0], key, keylen) % chm->n;
+ cmph_uint32 h2 = hash(chm->hashes[1], key, keylen) % chm->n;
+ DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
+ if (h1 == h2 && ++h2 >= chm->n) h2 = 0;
+ DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, chm->g[h1], chm->g[h2], chm->m);
+ return (chm->g[h1] + chm->g[h2]) % chm->m;
+}
+void chm_destroy(cmph_t *mphf)
+{
+ chm_data_t *data = (chm_data_t *)mphf->data;
+ free(data->g);
+ hash_state_destroy(data->hashes[0]);
+ hash_state_destroy(data->hashes[1]);
+ free(data->hashes);
+ free(data);
+ free(mphf);
+}
+
+/** \fn void chm_pack(cmph_t *mphf, void *packed_mphf);
+ * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
+ * \param mphf pointer to the resulting mphf
+ * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
+ */
+void chm_pack(cmph_t *mphf, void *packed_mphf)
+{
+ chm_data_t *data = (chm_data_t *)mphf->data;
+ cmph_uint8 * ptr = packed_mphf;
+
+ // packing h1 type
+ CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
+ *((cmph_uint32 *) ptr) = h1_type;
+ ptr += sizeof(cmph_uint32);
+
+ // packing h1
+ hash_state_pack(data->hashes[0], ptr);
+ ptr += hash_state_packed_size(h1_type);
+
+ // packing h2 type
+ CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
+ *((cmph_uint32 *) ptr) = h2_type;
+ ptr += sizeof(cmph_uint32);
+
+ // packing h2
+ hash_state_pack(data->hashes[1], ptr);
+ ptr += hash_state_packed_size(h2_type);
+
+ // packing n
+ *((cmph_uint32 *) ptr) = data->n;
+ ptr += sizeof(data->n);
+
+ // packing m
+ *((cmph_uint32 *) ptr) = data->m;
+ ptr += sizeof(data->m);
+
+ // packing g
+ memcpy(ptr, data->g, sizeof(cmph_uint32)*data->n);
+}
+
+/** \fn cmph_uint32 chm_packed_size(cmph_t *mphf);
+ * \brief Return the amount of space needed to pack mphf.
+ * \param mphf pointer to a mphf
+ * \return the size of the packed function or zero for failures
+ */
+cmph_uint32 chm_packed_size(cmph_t *mphf)
+{
+ chm_data_t *data = (chm_data_t *)mphf->data;
+ CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
+ CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
+
+ return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
+ 4*sizeof(cmph_uint32) + sizeof(cmph_uint32)*data->n);
+}
+
+/** cmph_uint32 chm_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
+ * \brief Use the packed mphf to do a search.
+ * \param packed_mphf pointer to the packed mphf
+ * \param key key to be hashed
+ * \param keylen key legth in bytes
+ * \return The mphf value
+ */
+cmph_uint32 chm_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
+{
+ register cmph_uint8 *h1_ptr = packed_mphf;
+ register CMPH_HASH h1_type = *((cmph_uint32 *)h1_ptr);
+ h1_ptr += 4;
+
+ register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
+ register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr);
+ h2_ptr += 4;
+
+ register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type));
+
+ register cmph_uint32 n = *g_ptr++;
+ register cmph_uint32 m = *g_ptr++;
+
+ register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n;
+ register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n;
+ DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
+ if (h1 == h2 && ++h2 >= n) h2 = 0;
+ DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, g_ptr[h1], g_ptr[h2], m);
+ return (g_ptr[h1] + g_ptr[h2]) % m;
+}
diff --git a/girepository/cmph/chm.h b/girepository/cmph/chm.h
new file mode 100644
index 00000000..341be29e
--- /dev/null
+++ b/girepository/cmph/chm.h
@@ -0,0 +1,42 @@
+#ifndef __CMPH_CHM_H__
+#define __CMPH_CHM_H__
+
+#include "cmph.h"
+
+typedef struct __chm_data_t chm_data_t;
+typedef struct __chm_config_data_t chm_config_data_t;
+
+chm_config_data_t *chm_config_new();
+void chm_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
+void chm_config_destroy(cmph_config_t *mph);
+cmph_t *chm_new(cmph_config_t *mph, double c);
+
+void chm_load(FILE *f, cmph_t *mphf);
+int chm_dump(cmph_t *mphf, FILE *f);
+void chm_destroy(cmph_t *mphf);
+cmph_uint32 chm_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
+
+/** \fn void chm_pack(cmph_t *mphf, void *packed_mphf);
+ * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
+ * \param mphf pointer to the resulting mphf
+ * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
+ */
+void chm_pack(cmph_t *mphf, void *packed_mphf);
+
+/** \fn cmph_uint32 chm_packed_size(cmph_t *mphf);
+ * \brief Return the amount of space needed to pack mphf.
+ * \param mphf pointer to a mphf
+ * \return the size of the packed function or zero for failures
+ */
+cmph_uint32 chm_packed_size(cmph_t *mphf);
+
+/** cmph_uint32 chm_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
+ * \brief Use the packed mphf to do a search.
+ * \param packed_mphf pointer to the packed mphf
+ * \param key key to be hashed
+ * \param keylen key legth in bytes
+ * \return The mphf value
+ */
+cmph_uint32 chm_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
+
+#endif
diff --git a/girepository/cmph/chm_structs.h b/girepository/cmph/chm_structs.h
new file mode 100644
index 00000000..fcad1bc3
--- /dev/null
+++ b/girepository/cmph/chm_structs.h
@@ -0,0 +1,24 @@
+#ifndef __CMPH_CHM_STRUCTS_H__
+#define __CMPH_CHM_STRUCTS_H__
+
+#include "hash_state.h"
+
+struct __chm_data_t
+{
+ cmph_uint32 m; //edges (words) count
+ cmph_uint32 n; //vertex count
+ cmph_uint32 *g;
+ hash_state_t **hashes;
+};
+
+struct __chm_config_data_t
+{
+ CMPH_HASH hashfuncs[2];
+ cmph_uint32 m; //edges (words) count
+ cmph_uint32 n; //vertex count
+ graph_t *graph;
+ cmph_uint32 *g;
+ hash_state_t **hashes;
+};
+
+#endif
diff --git a/girepository/cmph/cmph.c b/girepository/cmph/cmph.c
new file mode 100644
index 00000000..cba735f4
--- /dev/null
+++ b/girepository/cmph/cmph.c
@@ -0,0 +1,845 @@
+#include "cmph.h"
+#include "cmph_structs.h"
+#include "chm.h"
+#include "bmz.h"
+#include "bmz8.h"
+#include "brz.h"
+#include "fch.h"
+#include "bdz.h"
+#include "bdz_ph.h"
+#include "chd_ph.h"
+#include "chd.h"
+
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+//#define DEBUG
+#include "debug.h"
+
+const char *cmph_names[] = {"bmz", "bmz8", "chm", "brz", "fch", "bdz", "bdz_ph", "chd_ph", "chd", NULL };
+
+typedef struct
+{
+ void *vector;
+ cmph_uint32 position; // access position when data is a vector
+} cmph_vector_t;
+
+
+
+/**
+ * Support a vector of struct as the source of keys.
+ *
+ * E.g. The keys could be the fieldB's in a vector of struct rec where
+ * struct rec is defined as:
+ * struct rec {
+ * fieldA;
+ * fieldB;
+ * fieldC;
+ * }
+ */
+typedef struct
+{
+ void *vector; /* Pointer to the vector of struct */
+ cmph_uint32 position; /* current position */
+ cmph_uint32 struct_size; /* The size of the struct */
+ cmph_uint32 key_offset; /* The byte offset of the key in the struct */
+ cmph_uint32 key_len; /* The length of the key */
+} cmph_struct_vector_t;
+
+
+static cmph_io_adapter_t *cmph_io_vector_new(void * vector, cmph_uint32 nkeys);
+static void cmph_io_vector_destroy(cmph_io_adapter_t * key_source);
+
+static cmph_io_adapter_t *cmph_io_struct_vector_new(void * vector, cmph_uint32 struct_size, cmph_uint32 key_offset, cmph_uint32 key_len, cmph_uint32 nkeys);
+static void cmph_io_struct_vector_destroy(cmph_io_adapter_t * key_source);
+
+static int key_nlfile_read(void *data, char **key, cmph_uint32 *keylen)
+{
+ FILE *fd = (FILE *)data;
+ *key = NULL;
+ *keylen = 0;
+ while(1)
+ {
+ char buf[BUFSIZ];
+ char *c = fgets(buf, BUFSIZ, fd);
+ if (c == NULL) return -1;
+ if (feof(fd)) return -1;
+ *key = (char *)realloc(*key, *keylen + strlen(buf) + 1);
+ memcpy(*key + *keylen, buf, strlen(buf));
+ *keylen += (cmph_uint32)strlen(buf);
+ if (buf[strlen(buf) - 1] != '\n') continue;
+ break;
+ }
+ if ((*keylen) && (*key)[*keylen - 1] == '\n')
+ {
+ (*key)[(*keylen) - 1] = 0;
+ --(*keylen);
+ }
+ return (int)(*keylen);
+}
+
+static int key_byte_vector_read(void *data, char **key, cmph_uint32 *keylen)
+{
+ cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
+ cmph_uint8 **keys_vd = (cmph_uint8 **)cmph_vector->vector;
+ size_t size;
+ memcpy(keylen, keys_vd[cmph_vector->position], sizeof(*keylen));
+ size = *keylen;
+ *key = (char *)malloc(size);
+ memcpy(*key, keys_vd[cmph_vector->position] + sizeof(*keylen), size);
+ cmph_vector->position = cmph_vector->position + 1;
+ return (int)(*keylen);
+
+}
+
+static int key_struct_vector_read(void *data, char **key, cmph_uint32 *keylen)
+{
+ cmph_struct_vector_t *cmph_struct_vector = (cmph_struct_vector_t *)data;
+ char *keys_vd = (char *)cmph_struct_vector->vector;
+ size_t size;
+ *keylen = cmph_struct_vector->key_len;
+ size = *keylen;
+ *key = (char *)malloc(size);
+ memcpy(*key, (keys_vd + (cmph_struct_vector->position * cmph_struct_vector->struct_size) + cmph_struct_vector->key_offset), size);
+ cmph_struct_vector->position = cmph_struct_vector->position + 1;
+ return (int)(*keylen);
+}
+
+static int key_vector_read(void *data, char **key, cmph_uint32 *keylen)
+{
+ cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
+ char **keys_vd = (char **)cmph_vector->vector;
+ size_t size;
+ *keylen = (cmph_uint32)strlen(keys_vd[cmph_vector->position]);
+ size = *keylen;
+ *key = (char *)malloc(size + 1);
+ strcpy(*key, keys_vd[cmph_vector->position]);
+ cmph_vector->position = cmph_vector->position + 1;
+ return (int)(*keylen);
+
+}
+
+
+static void key_nlfile_dispose(void *data, char *key, cmph_uint32 keylen)
+{
+ free(key);
+}
+
+static void key_vector_dispose(void *data, char *key, cmph_uint32 keylen)
+{
+ free(key);
+}
+
+static void key_nlfile_rewind(void *data)
+{
+ FILE *fd = (FILE *)data;
+ rewind(fd);
+}
+
+static void key_struct_vector_rewind(void *data)
+{
+ cmph_struct_vector_t *cmph_struct_vector = (cmph_struct_vector_t *)data;
+ cmph_struct_vector->position = 0;
+}
+
+static void key_vector_rewind(void *data)
+{
+ cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
+ cmph_vector->position = 0;
+}
+
+static cmph_uint32 count_nlfile_keys(FILE *fd)
+{
+ cmph_uint32 count = 0;
+ register char * ptr;
+ rewind(fd);
+ while(1)
+ {
+ char buf[BUFSIZ];
+ ptr = fgets(buf, BUFSIZ, fd);
+ if (feof(fd)) break;
+ if (buf[strlen(buf) - 1] != '\n') continue;
+ ++count;
+ }
+ rewind(fd);
+ return count;
+}
+
+cmph_io_adapter_t *cmph_io_nlfile_adapter(FILE * keys_fd)
+{
+ cmph_io_adapter_t * key_source = (cmph_io_adapter_t *)malloc(sizeof(cmph_io_adapter_t));
+ assert(key_source);
+ key_source->data = (void *)keys_fd;
+ key_source->nkeys = count_nlfile_keys(keys_fd);
+ key_source->read = key_nlfile_read;
+ key_source->dispose = key_nlfile_dispose;
+ key_source->rewind = key_nlfile_rewind;
+ return key_source;
+}
+
+void cmph_io_nlfile_adapter_destroy(cmph_io_adapter_t * key_source)
+{
+ free(key_source);
+}
+
+cmph_io_adapter_t *cmph_io_nlnkfile_adapter(FILE * keys_fd, cmph_uint32 nkeys)
+{
+ cmph_io_adapter_t * key_source = (cmph_io_adapter_t *)malloc(sizeof(cmph_io_adapter_t));
+ assert(key_source);
+ key_source->data = (void *)keys_fd;
+ key_source->nkeys = nkeys;
+ key_source->read = key_nlfile_read;
+ key_source->dispose = key_nlfile_dispose;
+ key_source->rewind = key_nlfile_rewind;
+ return key_source;
+}
+
+void cmph_io_nlnkfile_adapter_destroy(cmph_io_adapter_t * key_source)
+{
+ free(key_source);
+}
+
+
+static cmph_io_adapter_t *cmph_io_struct_vector_new(void * vector, cmph_uint32 struct_size, cmph_uint32 key_offset, cmph_uint32 key_len, cmph_uint32 nkeys)
+{
+ cmph_io_adapter_t * key_source = (cmph_io_adapter_t *)malloc(sizeof(cmph_io_adapter_t));
+ cmph_struct_vector_t * cmph_struct_vector = (cmph_struct_vector_t *)malloc(sizeof(cmph_struct_vector_t));
+ assert(key_source);
+ assert(cmph_struct_vector);
+ cmph_struct_vector->vector = vector;
+ cmph_struct_vector->position = 0;
+ cmph_struct_vector->struct_size = struct_size;
+ cmph_struct_vector->key_offset = key_offset;
+ cmph_struct_vector->key_len = key_len;
+ key_source->data = (void *)cmph_struct_vector;
+ key_source->nkeys = nkeys;
+ return key_source;
+}
+
+static void cmph_io_struct_vector_destroy(cmph_io_adapter_t * key_source)
+{
+ cmph_struct_vector_t *cmph_struct_vector = (cmph_struct_vector_t *)key_source->data;
+ cmph_struct_vector->vector = NULL;
+ free(cmph_struct_vector);
+ free(key_source);
+}
+
+static cmph_io_adapter_t *cmph_io_vector_new(void * vector, cmph_uint32 nkeys)
+{
+ cmph_io_adapter_t * key_source = (cmph_io_adapter_t *)malloc(sizeof(cmph_io_adapter_t));
+ cmph_vector_t * cmph_vector = (cmph_vector_t *)malloc(sizeof(cmph_vector_t));
+ assert(key_source);
+ assert(cmph_vector);
+ cmph_vector->vector = vector;
+ cmph_vector->position = 0;
+ key_source->data = (void *)cmph_vector;
+ key_source->nkeys = nkeys;
+ return key_source;
+}
+
+static void cmph_io_vector_destroy(cmph_io_adapter_t * key_source)
+{
+ cmph_vector_t *cmph_vector = (cmph_vector_t *)key_source->data;
+ cmph_vector->vector = NULL;
+ free(cmph_vector);
+ free(key_source);
+}
+
+cmph_io_adapter_t *cmph_io_byte_vector_adapter(cmph_uint8 ** vector, cmph_uint32 nkeys)
+{
+ cmph_io_adapter_t * key_source = cmph_io_vector_new(vector, nkeys);
+ key_source->read = key_byte_vector_read;
+ key_source->dispose = key_vector_dispose;
+ key_source->rewind = key_vector_rewind;
+ return key_source;
+}
+void cmph_io_byte_vector_adapter_destroy(cmph_io_adapter_t * key_source)
+{
+ cmph_io_vector_destroy(key_source);
+}
+
+cmph_io_adapter_t *cmph_io_struct_vector_adapter(void * vector, cmph_uint32 struct_size, cmph_uint32 key_offset, cmph_uint32 key_len, cmph_uint32 nkeys)
+{
+ cmph_io_adapter_t * key_source = cmph_io_struct_vector_new(vector, struct_size, key_offset, key_len, nkeys);
+ key_source->read = key_struct_vector_read;
+ key_source->dispose = key_vector_dispose;
+ key_source->rewind = key_struct_vector_rewind;
+ return key_source;
+}
+
+void cmph_io_struct_vector_adapter_destroy(cmph_io_adapter_t * key_source)
+{
+ cmph_io_struct_vector_destroy(key_source);
+}
+
+cmph_io_adapter_t *cmph_io_vector_adapter(char ** vector, cmph_uint32 nkeys)
+{
+ cmph_io_adapter_t * key_source = cmph_io_vector_new(vector, nkeys);
+ key_source->read = key_vector_read;
+ key_source->dispose = key_vector_dispose;
+ key_source->rewind = key_vector_rewind;
+ return key_source;
+}
+
+void cmph_io_vector_adapter_destroy(cmph_io_adapter_t * key_source)
+{
+ cmph_io_vector_destroy(key_source);
+}
+
+cmph_config_t *cmph_config_new(cmph_io_adapter_t *key_source)
+{
+ cmph_config_t *mph = NULL;
+ mph = __config_new(key_source);
+ assert(mph);
+ mph->algo = CMPH_CHM; // default value
+ mph->data = chm_config_new();
+ return mph;
+}
+
+void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo)
+{
+ if (algo != mph->algo)
+ {
+ switch (mph->algo)
+ {
+ case CMPH_CHM:
+ chm_config_destroy(mph);
+ break;
+ case CMPH_BMZ:
+ bmz_config_destroy(mph);
+ break;
+ case CMPH_BMZ8:
+ bmz8_config_destroy(mph);
+ break;
+ case CMPH_BRZ:
+ brz_config_destroy(mph);
+ break;
+ case CMPH_FCH:
+ fch_config_destroy(mph);
+ break;
+ case CMPH_BDZ:
+ bdz_config_destroy(mph);
+ break;
+ case CMPH_BDZ_PH:
+ bdz_ph_config_destroy(mph);
+ break;
+ case CMPH_CHD_PH:
+ chd_ph_config_destroy(mph);
+ break;
+ case CMPH_CHD:
+ chd_config_destroy(mph);
+ break;
+ default:
+ assert(0);
+ }
+ switch(algo)
+ {
+ case CMPH_CHM:
+ mph->data = chm_config_new();
+ break;
+ case CMPH_BMZ:
+ mph->data = bmz_config_new();
+ break;
+ case CMPH_BMZ8:
+ mph->data = bmz8_config_new();
+ break;
+ case CMPH_BRZ:
+ mph->data = brz_config_new();
+ break;
+ case CMPH_FCH:
+ mph->data = fch_config_new();
+ break;
+ case CMPH_BDZ:
+ mph->data = bdz_config_new();
+ break;
+ case CMPH_BDZ_PH:
+ mph->data = bdz_ph_config_new();
+ break;
+ case CMPH_CHD_PH:
+ mph->data = chd_ph_config_new();
+ break;
+ case CMPH_CHD:
+ mph->data = chd_config_new(mph);
+ break;
+ default:
+ assert(0);
+ }
+ }
+ mph->algo = algo;
+}
+
+void cmph_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir)
+{
+ if (mph->algo == CMPH_BRZ)
+ {
+ brz_config_set_tmp_dir(mph, tmp_dir);
+ }
+}
+
+
+void cmph_config_set_mphf_fd(cmph_config_t *mph, FILE *mphf_fd)
+{
+ if (mph->algo == CMPH_BRZ)
+ {
+ brz_config_set_mphf_fd(mph, mphf_fd);
+ }
+}
+
+void cmph_config_set_b(cmph_config_t *mph, cmph_uint32 b)
+{
+ if (mph->algo == CMPH_BRZ)
+ {
+ brz_config_set_b(mph, b);
+ }
+ else if (mph->algo == CMPH_BDZ)
+ {
+ bdz_config_set_b(mph, b);
+ }
+ else if (mph->algo == CMPH_CHD_PH)
+ {
+ chd_ph_config_set_b(mph, b);
+ }
+ else if (mph->algo == CMPH_CHD)
+ {
+ chd_config_set_b(mph, b);
+ }
+}
+
+void cmph_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin)
+{
+ if (mph->algo == CMPH_CHD_PH)
+ {
+ chd_ph_config_set_keys_per_bin(mph, keys_per_bin);
+ }
+ else if (mph->algo == CMPH_CHD)
+ {
+ chd_config_set_keys_per_bin(mph, keys_per_bin);
+ }
+}
+
+void cmph_config_set_memory_availability(cmph_config_t *mph, cmph_uint32 memory_availability)
+{
+ if (mph->algo == CMPH_BRZ)
+ {
+ brz_config_set_memory_availability(mph, memory_availability);
+ }
+}
+
+void cmph_config_destroy(cmph_config_t *mph)
+{
+ if(mph)
+ {
+ DEBUGP("Destroying mph with algo %s\n", cmph_names[mph->algo]);
+ switch (mph->algo)
+ {
+ case CMPH_CHM:
+ chm_config_destroy(mph);
+ break;
+ case CMPH_BMZ: /* included -- Fabiano */
+ bmz_config_destroy(mph);
+ break;
+ case CMPH_BMZ8: /* included -- Fabiano */
+ bmz8_config_destroy(mph);
+ break;
+ case CMPH_BRZ: /* included -- Fabiano */
+ brz_config_destroy(mph);
+ break;
+ case CMPH_FCH: /* included -- Fabiano */
+ fch_config_destroy(mph);
+ break;
+ case CMPH_BDZ: /* included -- Fabiano */
+ bdz_config_destroy(mph);
+ break;
+ case CMPH_BDZ_PH: /* included -- Fabiano */
+ bdz_ph_config_destroy(mph);
+ break;
+ case CMPH_CHD_PH: /* included -- Fabiano */
+ chd_ph_config_destroy(mph);
+ break;
+ case CMPH_CHD: /* included -- Fabiano */
+ chd_config_destroy(mph);
+ break;
+ default:
+ assert(0);
+ }
+ __config_destroy(mph);
+ }
+}
+
+void cmph_config_set_verbosity(cmph_config_t *mph, cmph_uint32 verbosity)
+{
+ mph->verbosity = verbosity;
+}
+
+void cmph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
+{
+ switch (mph->algo)
+ {
+ case CMPH_CHM:
+ chm_config_set_hashfuncs(mph, hashfuncs);
+ break;
+ case CMPH_BMZ: /* included -- Fabiano */
+ bmz_config_set_hashfuncs(mph, hashfuncs);
+ break;
+ case CMPH_BMZ8: /* included -- Fabiano */
+ bmz8_config_set_hashfuncs(mph, hashfuncs);
+ break;
+ case CMPH_BRZ: /* included -- Fabiano */
+ brz_config_set_hashfuncs(mph, hashfuncs);
+ break;
+ case CMPH_FCH: /* included -- Fabiano */
+ fch_config_set_hashfuncs(mph, hashfuncs);
+ break;
+ case CMPH_BDZ: /* included -- Fabiano */
+ bdz_config_set_hashfuncs(mph, hashfuncs);
+ break;
+ case CMPH_BDZ_PH: /* included -- Fabiano */
+ bdz_ph_config_set_hashfuncs(mph, hashfuncs);
+ break;
+ case CMPH_CHD_PH: /* included -- Fabiano */
+ chd_ph_config_set_hashfuncs(mph, hashfuncs);
+ break;
+ case CMPH_CHD: /* included -- Fabiano */
+ chd_config_set_hashfuncs(mph, hashfuncs);
+ break;
+ default:
+ break;
+ }
+ return;
+}
+void cmph_config_set_graphsize(cmph_config_t *mph, double c)
+{
+ mph->c = c;
+ return;
+}
+
+cmph_t *cmph_new(cmph_config_t *mph)
+{
+ cmph_t *mphf = NULL;
+ double c = mph->c;
+
+ DEBUGP("Creating mph with algorithm %s\n", cmph_names[mph->algo]);
+ switch (mph->algo)
+ {
+ case CMPH_CHM:
+ DEBUGP("Creating chm hash\n");
+ mphf = chm_new(mph, c);
+ break;
+ case CMPH_BMZ: /* included -- Fabiano */
+ DEBUGP("Creating bmz hash\n");
+ mphf = bmz_new(mph, c);
+ break;
+ case CMPH_BMZ8: /* included -- Fabiano */
+ DEBUGP("Creating bmz8 hash\n");
+ mphf = bmz8_new(mph, c);
+ break;
+ case CMPH_BRZ: /* included -- Fabiano */
+ DEBUGP("Creating brz hash\n");
+ if (c >= 2.0) brz_config_set_algo(mph, CMPH_FCH);
+ else brz_config_set_algo(mph, CMPH_BMZ8);
+ mphf = brz_new(mph, c);
+ break;
+ case CMPH_FCH: /* included -- Fabiano */
+ DEBUGP("Creating fch hash\n");
+ mphf = fch_new(mph, c);
+ break;
+ case CMPH_BDZ: /* included -- Fabiano */
+ DEBUGP("Creating bdz hash\n");
+ mphf = bdz_new(mph, c);
+ break;
+ case CMPH_BDZ_PH: /* included -- Fabiano */
+ DEBUGP("Creating bdz_ph hash\n");
+ mphf = bdz_ph_new(mph, c);
+ break;
+ case CMPH_CHD_PH: /* included -- Fabiano */
+ DEBUGP("Creating chd_ph hash\n");
+ mphf = chd_ph_new(mph, c);
+ break;
+ case CMPH_CHD: /* included -- Fabiano */
+ DEBUGP("Creating chd hash\n");
+ mphf = chd_new(mph, c);
+ break;
+ default:
+ assert(0);
+ }
+ return mphf;
+}
+
+int cmph_dump(cmph_t *mphf, FILE *f)
+{
+ switch (mphf->algo)
+ {
+ case CMPH_CHM:
+ return chm_dump(mphf, f);
+ case CMPH_BMZ: /* included -- Fabiano */
+ return bmz_dump(mphf, f);
+ case CMPH_BMZ8: /* included -- Fabiano */
+ return bmz8_dump(mphf, f);
+ case CMPH_BRZ: /* included -- Fabiano */
+ return brz_dump(mphf, f);
+ case CMPH_FCH: /* included -- Fabiano */
+ return fch_dump(mphf, f);
+ case CMPH_BDZ: /* included -- Fabiano */
+ return bdz_dump(mphf, f);
+ case CMPH_BDZ_PH: /* included -- Fabiano */
+ return bdz_ph_dump(mphf, f);
+ case CMPH_CHD_PH: /* included -- Fabiano */
+ return chd_ph_dump(mphf, f);
+ case CMPH_CHD: /* included -- Fabiano */
+ return chd_dump(mphf, f);
+ default:
+ assert(0);
+ }
+ assert(0);
+ return 0;
+}
+cmph_t *cmph_load(FILE *f)
+{
+ cmph_t *mphf = NULL;
+ DEBUGP("Loading mphf generic parts\n");
+ mphf = __cmph_load(f);
+ if (mphf == NULL) return NULL;
+ DEBUGP("Loading mphf algorithm dependent parts\n");
+
+ switch (mphf->algo)
+ {
+ case CMPH_CHM:
+ chm_load(f, mphf);
+ break;
+ case CMPH_BMZ: /* included -- Fabiano */
+ DEBUGP("Loading bmz algorithm dependent parts\n");
+ bmz_load(f, mphf);
+ break;
+ case CMPH_BMZ8: /* included -- Fabiano */
+ DEBUGP("Loading bmz8 algorithm dependent parts\n");
+ bmz8_load(f, mphf);
+ break;
+ case CMPH_BRZ: /* included -- Fabiano */
+ DEBUGP("Loading brz algorithm dependent parts\n");
+ brz_load(f, mphf);
+ break;
+ case CMPH_FCH: /* included -- Fabiano */
+ DEBUGP("Loading fch algorithm dependent parts\n");
+ fch_load(f, mphf);
+ break;
+ case CMPH_BDZ: /* included -- Fabiano */
+ DEBUGP("Loading bdz algorithm dependent parts\n");
+ bdz_load(f, mphf);
+ break;
+ case CMPH_BDZ_PH: /* included -- Fabiano */
+ DEBUGP("Loading bdz_ph algorithm dependent parts\n");
+ bdz_ph_load(f, mphf);
+ break;
+ case CMPH_CHD_PH: /* included -- Fabiano */
+ DEBUGP("Loading chd_ph algorithm dependent parts\n");
+ chd_ph_load(f, mphf);
+ break;
+ case CMPH_CHD: /* included -- Fabiano */
+ DEBUGP("Loading chd algorithm dependent parts\n");
+ chd_load(f, mphf);
+ break;
+ default:
+ assert(0);
+ }
+ DEBUGP("Loaded mphf\n");
+ return mphf;
+}
+
+
+cmph_uint32 cmph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
+{
+ DEBUGP("mphf algorithm: %u \n", mphf->algo);
+ switch(mphf->algo)
+ {
+ case CMPH_CHM:
+ return chm_search(mphf, key, keylen);
+ case CMPH_BMZ: /* included -- Fabiano */
+ DEBUGP("bmz algorithm search\n");
+ return bmz_search(mphf, key, keylen);
+ case CMPH_BMZ8: /* included -- Fabiano */
+ DEBUGP("bmz8 algorithm search\n");
+ return bmz8_search(mphf, key, keylen);
+ case CMPH_BRZ: /* included -- Fabiano */
+ DEBUGP("brz algorithm search\n");
+ return brz_search(mphf, key, keylen);
+ case CMPH_FCH: /* included -- Fabiano */
+ DEBUGP("fch algorithm search\n");
+ return fch_search(mphf, key, keylen);
+ case CMPH_BDZ: /* included -- Fabiano */
+ DEBUGP("bdz algorithm search\n");
+ return bdz_search(mphf, key, keylen);
+ case CMPH_BDZ_PH: /* included -- Fabiano */
+ DEBUGP("bdz_ph algorithm search\n");
+ return bdz_ph_search(mphf, key, keylen);
+ case CMPH_CHD_PH: /* included -- Fabiano */
+ DEBUGP("chd_ph algorithm search\n");
+ return chd_ph_search(mphf, key, keylen);
+ case CMPH_CHD: /* included -- Fabiano */
+ DEBUGP("chd algorithm search\n");
+ return chd_search(mphf, key, keylen);
+ default:
+ assert(0);
+ }
+ assert(0);
+ return 0;
+}
+
+cmph_uint32 cmph_size(cmph_t *mphf)
+{
+ return mphf->size;
+}
+
+void cmph_destroy(cmph_t *mphf)
+{
+ switch(mphf->algo)
+ {
+ case CMPH_CHM:
+ chm_destroy(mphf);
+ return;
+ case CMPH_BMZ: /* included -- Fabiano */
+ bmz_destroy(mphf);
+ return;
+ case CMPH_BMZ8: /* included -- Fabiano */
+ bmz8_destroy(mphf);
+ return;
+ case CMPH_BRZ: /* included -- Fabiano */
+ brz_destroy(mphf);
+ return;
+ case CMPH_FCH: /* included -- Fabiano */
+ fch_destroy(mphf);
+ return;
+ case CMPH_BDZ: /* included -- Fabiano */
+ bdz_destroy(mphf);
+ return;
+ case CMPH_BDZ_PH: /* included -- Fabiano */
+ bdz_ph_destroy(mphf);
+ return;
+ case CMPH_CHD_PH: /* included -- Fabiano */
+ chd_ph_destroy(mphf);
+ return;
+ case CMPH_CHD: /* included -- Fabiano */
+ chd_destroy(mphf);
+ return;
+ default:
+ assert(0);
+ }
+ assert(0);
+ return;
+}
+
+
+/** \fn void cmph_pack(cmph_t *mphf, void *packed_mphf);
+ * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
+ * \param mphf pointer to the resulting mphf
+ * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
+ */
+void cmph_pack(cmph_t *mphf, void *packed_mphf)
+{
+ // packing algorithm type to be used in cmph.c
+ cmph_uint32 * ptr = (cmph_uint32 *) packed_mphf;
+ *ptr++ = mphf->algo;
+ DEBUGP("mphf->algo = %u\n", mphf->algo);
+ switch(mphf->algo)
+ {
+ case CMPH_CHM:
+ chm_pack(mphf, ptr);
+ break;
+ case CMPH_BMZ: /* included -- Fabiano */
+ bmz_pack(mphf, ptr);
+ break;
+ case CMPH_BMZ8: /* included -- Fabiano */
+ bmz8_pack(mphf, ptr);
+ break;
+ case CMPH_BRZ: /* included -- Fabiano */
+ brz_pack(mphf, ptr);
+ break;
+ case CMPH_FCH: /* included -- Fabiano */
+ fch_pack(mphf, ptr);
+ break;
+ case CMPH_BDZ: /* included -- Fabiano */
+ bdz_pack(mphf, ptr);
+ break;
+ case CMPH_BDZ_PH: /* included -- Fabiano */
+ bdz_ph_pack(mphf, ptr);
+ break;
+ case CMPH_CHD_PH: /* included -- Fabiano */
+ chd_ph_pack(mphf, ptr);
+ break;
+ case CMPH_CHD: /* included -- Fabiano */
+ chd_pack(mphf, ptr);
+ break;
+ default:
+ assert(0);
+ }
+ return;
+}
+
+/** \fn cmph_uint32 cmph_packed_size(cmph_t *mphf);
+ * \brief Return the amount of space needed to pack mphf.
+ * \param mphf pointer to a mphf
+ * \return the size of the packed function or zero for failures
+ */
+cmph_uint32 cmph_packed_size(cmph_t *mphf)
+{
+ switch(mphf->algo)
+ {
+ case CMPH_CHM:
+ return chm_packed_size(mphf);
+ case CMPH_BMZ: /* included -- Fabiano */
+ return bmz_packed_size(mphf);
+ case CMPH_BMZ8: /* included -- Fabiano */
+ return bmz8_packed_size(mphf);
+ case CMPH_BRZ: /* included -- Fabiano */
+ return brz_packed_size(mphf);
+ case CMPH_FCH: /* included -- Fabiano */
+ return fch_packed_size(mphf);
+ case CMPH_BDZ: /* included -- Fabiano */
+ return bdz_packed_size(mphf);
+ case CMPH_BDZ_PH: /* included -- Fabiano */
+ return bdz_ph_packed_size(mphf);
+ case CMPH_CHD_PH: /* included -- Fabiano */
+ return chd_ph_packed_size(mphf);
+ case CMPH_CHD: /* included -- Fabiano */
+ return chd_packed_size(mphf);
+ default:
+ assert(0);
+ }
+ return 0; // FAILURE
+}
+
+/** cmph_uint32 cmph_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
+ * \brief Use the packed mphf to do a search.
+ * \param packed_mphf pointer to the packed mphf
+ * \param key key to be hashed
+ * \param keylen key legth in bytes
+ * \return The mphf value
+ */
+cmph_uint32 cmph_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
+{
+ cmph_uint32 *ptr = (cmph_uint32 *)packed_mphf;
+// fprintf(stderr, "algo:%u\n", *ptr);
+ switch(*ptr)
+ {
+ case CMPH_CHM:
+ return chm_search_packed(++ptr, key, keylen);
+ case CMPH_BMZ: /* included -- Fabiano */
+ return bmz_search_packed(++ptr, key, keylen);
+ case CMPH_BMZ8: /* included -- Fabiano */
+ return bmz8_search_packed(++ptr, key, keylen);
+ case CMPH_BRZ: /* included -- Fabiano */
+ return brz_search_packed(++ptr, key, keylen);
+ case CMPH_FCH: /* included -- Fabiano */
+ return fch_search_packed(++ptr, key, keylen);
+ case CMPH_BDZ: /* included -- Fabiano */
+ return bdz_search_packed(++ptr, key, keylen);
+ case CMPH_BDZ_PH: /* included -- Fabiano */
+ return bdz_ph_search_packed(++ptr, key, keylen);
+ case CMPH_CHD_PH: /* included -- Fabiano */
+ return chd_ph_search_packed(++ptr, key, keylen);
+ case CMPH_CHD: /* included -- Fabiano */
+ return chd_search_packed(++ptr, key, keylen);
+ default:
+ assert(0);
+ }
+ return 0; // FAILURE
+}
diff --git a/girepository/cmph/cmph.h b/girepository/cmph/cmph.h
new file mode 100644
index 00000000..1bc009e1
--- /dev/null
+++ b/girepository/cmph/cmph.h
@@ -0,0 +1,112 @@
+#ifndef __CMPH_H__
+#define __CMPH_H__
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include "cmph_types.h"
+
+typedef struct __config_t cmph_config_t;
+typedef struct __cmph_t cmph_t;
+
+typedef struct
+{
+ void *data;
+ cmph_uint32 nkeys;
+ int (*read)(void *, char **, cmph_uint32 *);
+ void (*dispose)(void *, char *, cmph_uint32);
+ void (*rewind)(void *);
+} cmph_io_adapter_t;
+
+/** Adapter pattern API **/
+/* please call free() in the created adapters */
+cmph_io_adapter_t *cmph_io_nlfile_adapter(FILE * keys_fd);
+void cmph_io_nlfile_adapter_destroy(cmph_io_adapter_t * key_source);
+
+cmph_io_adapter_t *cmph_io_nlnkfile_adapter(FILE * keys_fd, cmph_uint32 nkeys);
+void cmph_io_nlnkfile_adapter_destroy(cmph_io_adapter_t * key_source);
+
+cmph_io_adapter_t *cmph_io_vector_adapter(char ** vector, cmph_uint32 nkeys);
+void cmph_io_vector_adapter_destroy(cmph_io_adapter_t * key_source);
+
+cmph_io_adapter_t *cmph_io_byte_vector_adapter(cmph_uint8 ** vector, cmph_uint32 nkeys);
+void cmph_io_byte_vector_adapter_destroy(cmph_io_adapter_t * key_source);
+
+cmph_io_adapter_t *cmph_io_struct_vector_adapter(void * vector,
+ cmph_uint32 struct_size,
+ cmph_uint32 key_offset,
+ cmph_uint32 key_len,
+ cmph_uint32 nkeys);
+
+void cmph_io_struct_vector_adapter_destroy(cmph_io_adapter_t * key_source);
+
+/** Hash configuration API **/
+cmph_config_t *cmph_config_new(cmph_io_adapter_t *key_source);
+void cmph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
+void cmph_config_set_verbosity(cmph_config_t *mph, cmph_uint32 verbosity);
+void cmph_config_set_graphsize(cmph_config_t *mph, double c);
+void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo);
+void cmph_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir);
+void cmph_config_set_mphf_fd(cmph_config_t *mph, FILE *mphf_fd);
+void cmph_config_set_b(cmph_config_t *mph, cmph_uint32 b);
+void cmph_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin);
+void cmph_config_set_memory_availability(cmph_config_t *mph, cmph_uint32 memory_availability);
+void cmph_config_destroy(cmph_config_t *mph);
+
+/** Hash API **/
+cmph_t *cmph_new(cmph_config_t *mph);
+
+/** cmph_uint32 cmph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
+ * \brief Computes the mphf value.
+ * \param mphf pointer to the resulting function
+ * \param key is the key to be hashed
+ * \param keylen is the key legth in bytes
+ * \return The mphf value
+ */
+cmph_uint32 cmph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
+
+cmph_uint32 cmph_size(cmph_t *mphf);
+void cmph_destroy(cmph_t *mphf);
+
+/** Hash serialization/deserialization */
+int cmph_dump(cmph_t *mphf, FILE *f);
+cmph_t *cmph_load(FILE *f);
+
+/** \fn void cmph_pack(cmph_t *mphf, void *packed_mphf);
+ * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
+ * \param mphf pointer to the resulting mphf
+ * \param packed_mphf pointer to the contiguous memory area used to store the
+ * \param resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
+ */
+void cmph_pack(cmph_t *mphf, void *packed_mphf);
+
+/** \fn cmph_uint32 cmph_packed_size(cmph_t *mphf);
+ * \brief Return the amount of space needed to pack mphf.
+ * \param mphf pointer to a mphf
+ * \return the size of the packed function or zero for failures
+ */
+cmph_uint32 cmph_packed_size(cmph_t *mphf);
+
+/** cmph_uint32 cmph_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
+ * \brief Use the packed mphf to do a search.
+ * \param packed_mphf pointer to the packed mphf
+ * \param key key to be hashed
+ * \param keylen key legth in bytes
+ * \return The mphf value
+ */
+cmph_uint32 cmph_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
+
+// TIMING functions. To use the macro CMPH_TIMING must be defined
+#include "cmph_time.h"
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/girepository/cmph/cmph_structs.c b/girepository/cmph/cmph_structs.c
new file mode 100644
index 00000000..b5634248
--- /dev/null
+++ b/girepository/cmph/cmph_structs.c
@@ -0,0 +1,69 @@
+#include "cmph_structs.h"
+
+#include <string.h>
+
+//#define DEBUG
+#include "debug.h"
+
+cmph_config_t *__config_new(cmph_io_adapter_t *key_source)
+{
+ cmph_config_t *mph = (cmph_config_t *)malloc(sizeof(cmph_config_t));
+ memset(mph, 0, sizeof(cmph_config_t));
+ if (mph == NULL) return NULL;
+ mph->key_source = key_source;
+ mph->verbosity = 0;
+ mph->data = NULL;
+ mph->c = 0;
+ return mph;
+}
+
+void __config_destroy(cmph_config_t *mph)
+{
+ free(mph);
+}
+
+void __cmph_dump(cmph_t *mphf, FILE *fd)
+{
+ register size_t nbytes;
+ nbytes = fwrite(cmph_names[mphf->algo], (size_t)(strlen(cmph_names[mphf->algo]) + 1), (size_t)1, fd);
+ nbytes = fwrite(&(mphf->size), sizeof(mphf->size), (size_t)1, fd);
+}
+cmph_t *__cmph_load(FILE *f)
+{
+ cmph_t *mphf = NULL;
+ cmph_uint32 i;
+ char algo_name[BUFSIZ];
+ char *ptr = algo_name;
+ CMPH_ALGO algo = CMPH_COUNT;
+ register size_t nbytes;
+
+ DEBUGP("Loading mphf\n");
+ while(1)
+ {
+ size_t c = fread(ptr, (size_t)1, (size_t)1, f);
+ if (c != 1) return NULL;
+ if (*ptr == 0) break;
+ ++ptr;
+ }
+ for(i = 0; i < CMPH_COUNT; ++i)
+ {
+ if (strcmp(algo_name, cmph_names[i]) == 0)
+ {
+ algo = i;
+ }
+ }
+ if (algo == CMPH_COUNT)
+ {
+ DEBUGP("Algorithm %s not found\n", algo_name);
+ return NULL;
+ }
+ mphf = (cmph_t *)malloc(sizeof(cmph_t));
+ mphf->algo = algo;
+ nbytes = fread(&(mphf->size), sizeof(mphf->size), (size_t)1, f);
+ mphf->data = NULL;
+ DEBUGP("Algorithm is %s and mphf is sized %u\n", cmph_names[algo], mphf->size);
+
+ return mphf;
+}
+
+
diff --git a/girepository/cmph/cmph_structs.h b/girepository/cmph/cmph_structs.h
new file mode 100644
index 00000000..88fafb6c
--- /dev/null
+++ b/girepository/cmph/cmph_structs.h
@@ -0,0 +1,33 @@
+#ifndef __CMPH_STRUCTS_H__
+#define __CMPH_STRUCTS_H__
+
+#include "cmph.h"
+
+/** Hash generation algorithm data
+ */
+struct __config_t
+{
+ CMPH_ALGO algo;
+ cmph_io_adapter_t *key_source;
+ cmph_uint32 verbosity;
+ double c;
+ void *data; // algorithm dependent data
+};
+
+/** Hash querying algorithm data
+ */
+struct __cmph_t
+{
+ CMPH_ALGO algo;
+ cmph_uint32 size;
+ cmph_io_adapter_t *key_source;
+ void *data; // algorithm dependent data
+};
+
+cmph_config_t *__config_new(cmph_io_adapter_t *key_source);
+void __config_destroy(cmph_config_t*);
+void __cmph_dump(cmph_t *mphf, FILE *);
+cmph_t *__cmph_load(FILE *f);
+
+
+#endif
diff --git a/girepository/cmph/cmph_time.h b/girepository/cmph/cmph_time.h
new file mode 100644
index 00000000..d8018090
--- /dev/null
+++ b/girepository/cmph/cmph_time.h
@@ -0,0 +1,62 @@
+#ifdef ELAPSED_TIME_IN_SECONDS
+#undef ELAPSED_TIME_IN_SECONDS
+#endif
+
+#ifdef ELAPSED_TIME_IN_uSECONDS
+#undef ELAPSED_TIME_IN_uSECONDS
+#endif
+
+#ifdef WIN32
+// include headers to use gettimeofday
+#else
+ #ifdef __GNUC__
+ #include <sys/time.h>
+ #include <sys/resource.h>
+ #endif
+#endif
+
+#ifdef __GNUC__
+ #ifndef __CMPH_TIME_H__
+ #define __CMPH_TIME_H__
+ static inline void elapsed_time_in_seconds(double * elapsed_time)
+ {
+ struct timeval e_time;
+ if (gettimeofday(&e_time, NULL) < 0) {
+ return;
+ }
+ *elapsed_time = (double)e_time.tv_sec + ((double)e_time.tv_usec/1000000.0);
+ }
+ static inline void dummy_elapsed_time_in_seconds()
+ {
+ }
+ static inline void elapsed_time_in_useconds(cmph_uint64 * elapsed_time)
+ {
+ struct timeval e_time;
+ if (gettimeofday(&e_time, NULL) < 0) {
+ return;
+ }
+ *elapsed_time = (cmph_uint64)(e_time.tv_sec*1000000 + e_time.tv_usec);
+ }
+ static inline void dummy_elapsed_time_in_useconds()
+ {
+ }
+ #endif
+#endif
+
+#ifdef CMPH_TIMING
+ #ifdef __GNUC__
+ #define ELAPSED_TIME_IN_SECONDS elapsed_time_in_seconds
+ #define ELAPSED_TIME_IN_uSECONDS elapsed_time_in_useconds
+ #else
+ #define ELAPSED_TIME_IN_SECONDS dummy_elapsed_time_in_seconds
+ #define ELAPSED_TIME_IN_uSECONDS dummy_elapsed_time_in_useconds
+ #endif
+#else
+ #ifdef __GNUC__
+ #define ELAPSED_TIME_IN_SECONDS
+ #define ELAPSED_TIME_IN_uSECONDS
+ #else
+ #define ELAPSED_TIME_IN_SECONDS dummy_elapsed_time_in_seconds
+ #define ELAPSED_TIME_IN_uSECONDS dummy_elapsed_time_in_useconds
+ #endif
+#endif
diff --git a/girepository/cmph/cmph_types.h b/girepository/cmph/cmph_types.h
new file mode 100644
index 00000000..40f43329
--- /dev/null
+++ b/girepository/cmph/cmph_types.h
@@ -0,0 +1,42 @@
+#ifndef __CMPH_TYPES_H__
+#define __CMPH_TYPES_H__
+
+typedef char cmph_int8;
+typedef unsigned char cmph_uint8;
+
+typedef short cmph_int16;
+typedef unsigned short cmph_uint16;
+
+typedef int cmph_int32;
+typedef unsigned int cmph_uint32;
+
+#if defined(__ia64) || defined(__x86_64__)
+ /** \typedef long cmph_int64;
+ * \brief 64-bit integer for a 64-bit achitecture.
+ */
+ typedef long cmph_int64;
+
+ /** \typedef unsigned long cmph_uint64;
+ * \brief Unsigned 64-bit integer for a 64-bit achitecture.
+ */
+ typedef unsigned long cmph_uint64;
+#else
+ /** \typedef long long cmph_int64;
+ * \brief 64-bit integer for a 32-bit achitecture.
+ */
+ typedef long long cmph_int64;
+
+ /** \typedef unsigned long long cmph_uint64;
+ * \brief Unsigned 64-bit integer for a 32-bit achitecture.
+ */
+ typedef unsigned long long cmph_uint64;
+#endif
+
+typedef enum { CMPH_HASH_JENKINS, CMPH_HASH_COUNT } CMPH_HASH;
+extern const char *cmph_hash_names[];
+typedef enum { CMPH_BMZ, CMPH_BMZ8, CMPH_CHM, CMPH_BRZ, CMPH_FCH,
+ CMPH_BDZ, CMPH_BDZ_PH,
+ CMPH_CHD_PH, CMPH_CHD, CMPH_COUNT } CMPH_ALGO;
+extern const char *cmph_names[];
+
+#endif
diff --git a/girepository/cmph/compressed_rank.c b/girepository/cmph/compressed_rank.c
new file mode 100644
index 00000000..822b2e15
--- /dev/null
+++ b/girepository/cmph/compressed_rank.c
@@ -0,0 +1,321 @@
+#include<stdlib.h>
+#include<stdio.h>
+#include<limits.h>
+#include<string.h>
+#include"compressed_rank.h"
+#include"bitbool.h"
+// #define DEBUG
+#include"debug.h"
+static inline cmph_uint32 compressed_rank_i_log2(cmph_uint32 x)
+{
+ register cmph_uint32 res = 0;
+
+ while(x > 1)
+ {
+ x >>= 1;
+ res++;
+ }
+ return res;
+};
+
+void compressed_rank_init(compressed_rank_t * cr)
+{
+ cr->max_val = 0;
+ cr->n = 0;
+ cr->rem_r = 0;
+ select_init(&cr->sel);
+ cr->vals_rems = 0;
+}
+
+void compressed_rank_destroy(compressed_rank_t * cr)
+{
+ free(cr->vals_rems);
+ cr->vals_rems = 0;
+ select_destroy(&cr->sel);
+}
+
+void compressed_rank_generate(compressed_rank_t * cr, cmph_uint32 * vals_table, cmph_uint32 n)
+{
+ register cmph_uint32 i,j;
+ register cmph_uint32 rems_mask;
+ register cmph_uint32 * select_vec = 0;
+ cr->n = n;
+ cr->max_val = vals_table[cr->n - 1];
+ cr->rem_r = compressed_rank_i_log2(cr->max_val/cr->n);
+ if(cr->rem_r == 0)
+ {
+ cr->rem_r = 1;
+ }
+ select_vec = (cmph_uint32 *) calloc(cr->max_val >> cr->rem_r, sizeof(cmph_uint32));
+ cr->vals_rems = (cmph_uint32 *) calloc(BITS_TABLE_SIZE(cr->n, cr->rem_r), sizeof(cmph_uint32));
+ rems_mask = (1U << cr->rem_r) - 1U;
+
+ for(i = 0; i < cr->n; i++)
+ {
+ set_bits_value(cr->vals_rems, i, vals_table[i] & rems_mask, cr->rem_r, rems_mask);
+ }
+
+ for(i = 1, j = 0; i <= cr->max_val >> cr->rem_r; i++)
+ {
+ while(i > (vals_table[j] >> cr->rem_r))
+ {
+ j++;
+ }
+ select_vec[i - 1] = j;
+ };
+
+
+ // FABIANO: before it was (cr->total_length >> cr->rem_r) + 1. But I wiped out the + 1 because
+ // I changed the select structure to work up to m, instead of up to m - 1.
+ select_generate(&cr->sel, select_vec, cr->max_val >> cr->rem_r, cr->n);
+
+ free(select_vec);
+}
+
+cmph_uint32 compressed_rank_query(compressed_rank_t * cr, cmph_uint32 idx)
+{
+ register cmph_uint32 rems_mask;
+ register cmph_uint32 val_quot, val_rem;
+ register cmph_uint32 sel_res, rank;
+
+ if(idx > cr->max_val)
+ {
+ return cr->n;
+ }
+
+ val_quot = idx >> cr->rem_r;
+ rems_mask = (1U << cr->rem_r) - 1U;
+ val_rem = idx & rems_mask;
+ if(val_quot == 0)
+ {
+ rank = sel_res = 0;
+ }
+ else
+ {
+ sel_res = select_query(&cr->sel, val_quot - 1) + 1;
+ rank = sel_res - val_quot;
+ }
+
+ do
+ {
+ if(GETBIT32(cr->sel.bits_vec, sel_res))
+ {
+ break;
+ }
+ if(get_bits_value(cr->vals_rems, rank, cr->rem_r, rems_mask) >= val_rem)
+ {
+ break;
+ }
+ sel_res++;
+ rank++;
+ } while(1);
+
+ return rank;
+}
+
+cmph_uint32 compressed_rank_get_space_usage(compressed_rank_t * cr)
+{
+ register cmph_uint32 space_usage = select_get_space_usage(&cr->sel);
+ space_usage += BITS_TABLE_SIZE(cr->n, cr->rem_r)*(cmph_uint32)sizeof(cmph_uint32)*8;
+ space_usage += 3*(cmph_uint32)sizeof(cmph_uint32)*8;
+ return space_usage;
+}
+
+void compressed_rank_dump(compressed_rank_t * cr, char **buf, cmph_uint32 *buflen)
+{
+ register cmph_uint32 sel_size = select_packed_size(&(cr->sel));
+ register cmph_uint32 vals_rems_size = BITS_TABLE_SIZE(cr->n, cr->rem_r) * (cmph_uint32)sizeof(cmph_uint32);
+ register cmph_uint32 pos = 0;
+ char * buf_sel = 0;
+ cmph_uint32 buflen_sel = 0;
+
+ *buflen = 4*(cmph_uint32)sizeof(cmph_uint32) + sel_size + vals_rems_size;
+
+ DEBUGP("sel_size = %u\n", sel_size);
+ DEBUGP("vals_rems_size = %u\n", vals_rems_size);
+
+ *buf = (char *)calloc(*buflen, sizeof(char));
+
+ if (!*buf)
+ {
+ *buflen = UINT_MAX;
+ return;
+ }
+
+ // dumping max_val, n and rem_r
+ memcpy(*buf, &(cr->max_val), sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("max_val = %u\n", cr->max_val);
+
+ memcpy(*buf + pos, &(cr->n), sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("n = %u\n", cr->n);
+
+ memcpy(*buf + pos, &(cr->rem_r), sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("rem_r = %u\n", cr->rem_r);
+
+ // dumping sel
+ select_dump(&cr->sel, &buf_sel, &buflen_sel);
+ memcpy(*buf + pos, &buflen_sel, sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("buflen_sel = %u\n", buflen_sel);
+
+ memcpy(*buf + pos, buf_sel, buflen_sel);
+
+ #ifdef DEBUG
+ cmph_uint32 i = 0;
+ for(i = 0; i < buflen_sel; i++)
+ {
+ DEBUGP("pos = %u -- buf_sel[%u] = %u\n", pos, i, *(*buf + pos + i));
+ }
+ #endif
+ pos += buflen_sel;
+
+ free(buf_sel);
+
+ // dumping vals_rems
+ memcpy(*buf + pos, cr->vals_rems, vals_rems_size);
+ #ifdef DEBUG
+ for(i = 0; i < vals_rems_size; i++)
+ {
+ DEBUGP("pos = %u -- vals_rems_size = %u -- vals_rems[%u] = %u\n", pos, vals_rems_size, i, *(*buf + pos + i));
+ }
+ #endif
+ pos += vals_rems_size;
+
+ DEBUGP("Dumped compressed rank structure with size %u bytes\n", *buflen);
+}
+
+void compressed_rank_load(compressed_rank_t * cr, const char *buf, cmph_uint32 buflen)
+{
+ register cmph_uint32 pos = 0;
+ cmph_uint32 buflen_sel = 0;
+ register cmph_uint32 vals_rems_size = 0;
+
+ // loading max_val, n, and rem_r
+ memcpy(&(cr->max_val), buf, sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("max_val = %u\n", cr->max_val);
+
+ memcpy(&(cr->n), buf + pos, sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("n = %u\n", cr->n);
+
+ memcpy(&(cr->rem_r), buf + pos, sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("rem_r = %u\n", cr->rem_r);
+
+ // loading sel
+ memcpy(&buflen_sel, buf + pos, sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("buflen_sel = %u\n", buflen_sel);
+
+ select_load(&cr->sel, buf + pos, buflen_sel);
+ #ifdef DEBUG
+ cmph_uint32 i = 0;
+ for(i = 0; i < buflen_sel; i++)
+ {
+ DEBUGP("pos = %u -- buf_sel[%u] = %u\n", pos, i, *(buf + pos + i));
+ }
+ #endif
+ pos += buflen_sel;
+
+ // loading vals_rems
+ if(cr->vals_rems)
+ {
+ free(cr->vals_rems);
+ }
+ vals_rems_size = BITS_TABLE_SIZE(cr->n, cr->rem_r);
+ cr->vals_rems = (cmph_uint32 *) calloc(vals_rems_size, sizeof(cmph_uint32));
+ vals_rems_size *= 4;
+ memcpy(cr->vals_rems, buf + pos, vals_rems_size);
+
+ #ifdef DEBUG
+ for(i = 0; i < vals_rems_size; i++)
+ {
+ DEBUGP("pos = %u -- vals_rems_size = %u -- vals_rems[%u] = %u\n", pos, vals_rems_size, i, *(buf + pos + i));
+ }
+ #endif
+ pos += vals_rems_size;
+
+ DEBUGP("Loaded compressed rank structure with size %u bytes\n", buflen);
+}
+
+
+
+void compressed_rank_pack(compressed_rank_t *cr, void *cr_packed)
+{
+ if (cr && cr_packed)
+ {
+ char *buf = NULL;
+ cmph_uint32 buflen = 0;
+ compressed_rank_dump(cr, &buf, &buflen);
+ memcpy(cr_packed, buf, buflen);
+ free(buf);
+ }
+}
+
+cmph_uint32 compressed_rank_packed_size(compressed_rank_t *cr)
+{
+ register cmph_uint32 sel_size = select_packed_size(&cr->sel);
+ register cmph_uint32 vals_rems_size = BITS_TABLE_SIZE(cr->n, cr->rem_r) * (cmph_uint32)sizeof(cmph_uint32);
+ return 4 * (cmph_uint32)sizeof(cmph_uint32) + sel_size + vals_rems_size;
+}
+
+cmph_uint32 compressed_rank_query_packed(void * cr_packed, cmph_uint32 idx)
+{
+ // unpacking cr_packed
+ register cmph_uint32 *ptr = (cmph_uint32 *)cr_packed;
+ register cmph_uint32 max_val = *ptr++;
+ register cmph_uint32 n = *ptr++;
+ register cmph_uint32 rem_r = *ptr++;
+ register cmph_uint32 buflen_sel = *ptr++;
+ register cmph_uint32 * sel_packed = ptr;
+
+ register cmph_uint32 * bits_vec = sel_packed + 2; // skipping n and m
+
+ register cmph_uint32 * vals_rems = (ptr += (buflen_sel >> 2));
+
+ // compressed sequence query computation
+ register cmph_uint32 rems_mask;
+ register cmph_uint32 val_quot, val_rem;
+ register cmph_uint32 sel_res, rank;
+
+ if(idx > max_val)
+ {
+ return n;
+ }
+
+ val_quot = idx >> rem_r;
+ rems_mask = (1U << rem_r) - 1U;
+ val_rem = idx & rems_mask;
+ if(val_quot == 0)
+ {
+ rank = sel_res = 0;
+ }
+ else
+ {
+ sel_res = select_query_packed(sel_packed, val_quot - 1) + 1;
+ rank = sel_res - val_quot;
+ }
+
+ do
+ {
+ if(GETBIT32(bits_vec, sel_res))
+ {
+ break;
+ }
+ if(get_bits_value(vals_rems, rank, rem_r, rems_mask) >= val_rem)
+ {
+ break;
+ }
+ sel_res++;
+ rank++;
+ } while(1);
+
+ return rank;
+}
+
+
+
diff --git a/girepository/cmph/compressed_rank.h b/girepository/cmph/compressed_rank.h
new file mode 100644
index 00000000..bfe930dd
--- /dev/null
+++ b/girepository/cmph/compressed_rank.h
@@ -0,0 +1,55 @@
+#ifndef __CMPH_COMPRESSED_RANK_H__
+#define __CMPH_COMPRESSED_RANK_H__
+
+#include "select.h"
+
+struct _compressed_rank_t
+{
+ cmph_uint32 max_val;
+ cmph_uint32 n; // number of values stored in vals_rems
+ // The length in bits of each value is decomposed into two compnents: the lg(n) MSBs are stored in rank_select data structure
+ // the remaining LSBs are stored in a table of n cells, each one of rem_r bits.
+ cmph_uint32 rem_r;
+ select_t sel;
+ cmph_uint32 * vals_rems;
+};
+
+typedef struct _compressed_rank_t compressed_rank_t;
+
+void compressed_rank_init(compressed_rank_t * cr);
+
+void compressed_rank_destroy(compressed_rank_t * cr);
+
+void compressed_rank_generate(compressed_rank_t * cr, cmph_uint32 * vals_table, cmph_uint32 n);
+
+cmph_uint32 compressed_rank_query(compressed_rank_t * cr, cmph_uint32 idx);
+
+cmph_uint32 compressed_rank_get_space_usage(compressed_rank_t * cr);
+
+void compressed_rank_dump(compressed_rank_t * cr, char **buf, cmph_uint32 *buflen);
+
+void compressed_rank_load(compressed_rank_t * cr, const char *buf, cmph_uint32 buflen);
+
+
+/** \fn void compressed_rank_pack(compressed_rank_t *cr, void *cr_packed);
+ * \brief Support the ability to pack a compressed_rank structure into a preallocated contiguous memory space pointed by cr_packed.
+ * \param cr points to the compressed_rank structure
+ * \param cr_packed pointer to the contiguous memory area used to store the compressed_rank structure. The size of cr_packed must be at least @see compressed_rank_packed_size
+ */
+void compressed_rank_pack(compressed_rank_t *cr, void *cr_packed);
+
+/** \fn cmph_uint32 compressed_rank_packed_size(compressed_rank_t *cr);
+ * \brief Return the amount of space needed to pack a compressed_rank structure.
+ * \return the size of the packed compressed_rank structure or zero for failures
+ */
+cmph_uint32 compressed_rank_packed_size(compressed_rank_t *cr);
+
+
+/** \fn cmph_uint32 compressed_rank_query_packed(void * cr_packed, cmph_uint32 idx);
+ * \param cr_packed is a pointer to a contiguous memory area
+ * \param idx is an index to compute the rank
+ * \return an integer that represents the compressed_rank value.
+ */
+cmph_uint32 compressed_rank_query_packed(void * cr_packed, cmph_uint32 idx);
+
+#endif
diff --git a/girepository/cmph/compressed_seq.c b/girepository/cmph/compressed_seq.c
new file mode 100644
index 00000000..e558196d
--- /dev/null
+++ b/girepository/cmph/compressed_seq.c
@@ -0,0 +1,378 @@
+#include "compressed_seq.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <limits.h>
+#include <string.h>
+
+#include "bitbool.h"
+
+// #define DEBUG
+#include "debug.h"
+
+static inline cmph_uint32 compressed_seq_i_log2(cmph_uint32 x)
+{
+ register cmph_uint32 res = 0;
+
+ while(x > 1)
+ {
+ x >>= 1;
+ res++;
+ }
+ return res;
+};
+
+void compressed_seq_init(compressed_seq_t * cs)
+{
+ select_init(&cs->sel);
+ cs->n = 0;
+ cs->rem_r = 0;
+ cs->length_rems = 0;
+ cs->total_length = 0;
+ cs->store_table = 0;
+}
+
+void compressed_seq_destroy(compressed_seq_t * cs)
+{
+ free(cs->store_table);
+ cs->store_table = 0;
+ free(cs->length_rems);
+ cs->length_rems = 0;
+ select_destroy(&cs->sel);
+};
+
+
+void compressed_seq_generate(compressed_seq_t * cs, cmph_uint32 * vals_table, cmph_uint32 n)
+{
+ register cmph_uint32 i;
+ // lengths: represents lengths of encoded values
+ register cmph_uint32 * lengths = (cmph_uint32 *)calloc(n, sizeof(cmph_uint32));
+ register cmph_uint32 rems_mask;
+ register cmph_uint32 stored_value;
+
+ cs->n = n;
+ cs->total_length = 0;
+
+ for(i = 0; i < cs->n; i++)
+ {
+ if(vals_table[i] == 0)
+ {
+ lengths[i] = 0;
+ }
+ else
+ {
+ lengths[i] = compressed_seq_i_log2(vals_table[i] + 1);
+ cs->total_length += lengths[i];
+ };
+ };
+
+ if(cs->store_table)
+ {
+ free(cs->store_table);
+ }
+ cs->store_table = (cmph_uint32 *) calloc(((cs->total_length + 31) >> 5), sizeof(cmph_uint32));
+ cs->total_length = 0;
+
+ for(i = 0; i < cs->n; i++)
+ {
+ if(vals_table[i] == 0)
+ continue;
+ stored_value = vals_table[i] - ((1U << lengths[i]) - 1U);
+ set_bits_at_pos(cs->store_table, cs->total_length, stored_value, lengths[i]);
+ cs->total_length += lengths[i];
+ };
+
+ cs->rem_r = compressed_seq_i_log2(cs->total_length/cs->n);
+
+ if(cs->rem_r == 0)
+ {
+ cs->rem_r = 1;
+ }
+
+ if(cs->length_rems)
+ {
+ free(cs->length_rems);
+ }
+
+ cs->length_rems = (cmph_uint32 *) calloc(BITS_TABLE_SIZE(cs->n, cs->rem_r), sizeof(cmph_uint32));
+
+ rems_mask = (1U << cs->rem_r) - 1U;
+ cs->total_length = 0;
+
+ for(i = 0; i < cs->n; i++)
+ {
+ cs->total_length += lengths[i];
+ set_bits_value(cs->length_rems, i, cs->total_length & rems_mask, cs->rem_r, rems_mask);
+ lengths[i] = cs->total_length >> cs->rem_r;
+ };
+
+ select_init(&cs->sel);
+
+ // FABIANO: before it was (cs->total_length >> cs->rem_r) + 1. But I wiped out the + 1 because
+ // I changed the select structure to work up to m, instead of up to m - 1.
+ select_generate(&cs->sel, lengths, cs->n, (cs->total_length >> cs->rem_r));
+
+ free(lengths);
+};
+
+cmph_uint32 compressed_seq_get_space_usage(compressed_seq_t * cs)
+{
+ register cmph_uint32 space_usage = select_get_space_usage(&cs->sel);
+ space_usage += ((cs->total_length + 31) >> 5) * (cmph_uint32)sizeof(cmph_uint32) * 8;
+ space_usage += BITS_TABLE_SIZE(cs->n, cs->rem_r) * (cmph_uint32)sizeof(cmph_uint32) * 8;
+ return 4 * (cmph_uint32)sizeof(cmph_uint32) * 8 + space_usage;
+}
+
+cmph_uint32 compressed_seq_query(compressed_seq_t * cs, cmph_uint32 idx)
+{
+ register cmph_uint32 enc_idx, enc_length;
+ register cmph_uint32 rems_mask;
+ register cmph_uint32 stored_value;
+ register cmph_uint32 sel_res;
+
+ assert(idx < cs->n); // FABIANO ADDED
+
+ rems_mask = (1U << cs->rem_r) - 1U;
+
+ if(idx == 0)
+ {
+ enc_idx = 0;
+ sel_res = select_query(&cs->sel, idx);
+ }
+ else
+ {
+ sel_res = select_query(&cs->sel, idx - 1);
+
+ enc_idx = (sel_res - (idx - 1)) << cs->rem_r;
+ enc_idx += get_bits_value(cs->length_rems, idx-1, cs->rem_r, rems_mask);
+
+ sel_res = select_next_query(&cs->sel, sel_res);
+ };
+
+ enc_length = (sel_res - idx) << cs->rem_r;
+ enc_length += get_bits_value(cs->length_rems, idx, cs->rem_r, rems_mask);
+ enc_length -= enc_idx;
+ if(enc_length == 0)
+ return 0;
+
+ stored_value = get_bits_at_pos(cs->store_table, enc_idx, enc_length);
+ return stored_value + ((1U << enc_length) - 1U);
+};
+
+void compressed_seq_dump(compressed_seq_t * cs, char ** buf, cmph_uint32 * buflen)
+{
+ register cmph_uint32 sel_size = select_packed_size(&(cs->sel));
+ register cmph_uint32 length_rems_size = BITS_TABLE_SIZE(cs->n, cs->rem_r) * 4;
+ register cmph_uint32 store_table_size = ((cs->total_length + 31) >> 5) * 4;
+ register cmph_uint32 pos = 0;
+ char * buf_sel = 0;
+ cmph_uint32 buflen_sel = 0;
+
+ *buflen = 4*(cmph_uint32)sizeof(cmph_uint32) + sel_size + length_rems_size + store_table_size;
+
+ DEBUGP("sel_size = %u\n", sel_size);
+ DEBUGP("length_rems_size = %u\n", length_rems_size);
+ DEBUGP("store_table_size = %u\n", store_table_size);
+ *buf = (char *)calloc(*buflen, sizeof(char));
+
+ if (!*buf)
+ {
+ *buflen = UINT_MAX;
+ return;
+ }
+
+ // dumping n, rem_r and total_length
+ memcpy(*buf, &(cs->n), sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("n = %u\n", cs->n);
+
+ memcpy(*buf + pos, &(cs->rem_r), sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("rem_r = %u\n", cs->rem_r);
+
+ memcpy(*buf + pos, &(cs->total_length), sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("total_length = %u\n", cs->total_length);
+
+
+ // dumping sel
+ select_dump(&cs->sel, &buf_sel, &buflen_sel);
+ memcpy(*buf + pos, &buflen_sel, sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("buflen_sel = %u\n", buflen_sel);
+
+ memcpy(*buf + pos, buf_sel, buflen_sel);
+ #ifdef DEBUG
+ cmph_uint32 i = 0;
+ for(i = 0; i < buflen_sel; i++)
+ {
+ DEBUGP("pos = %u -- buf_sel[%u] = %u\n", pos, i, *(*buf + pos + i));
+ }
+ #endif
+ pos += buflen_sel;
+
+ free(buf_sel);
+
+ // dumping length_rems
+ memcpy(*buf + pos, cs->length_rems, length_rems_size);
+ #ifdef DEBUG
+ for(i = 0; i < length_rems_size; i++)
+ {
+ DEBUGP("pos = %u -- length_rems_size = %u -- length_rems[%u] = %u\n", pos, length_rems_size, i, *(*buf + pos + i));
+ }
+ #endif
+ pos += length_rems_size;
+
+ // dumping store_table
+ memcpy(*buf + pos, cs->store_table, store_table_size);
+
+ #ifdef DEBUG
+ for(i = 0; i < store_table_size; i++)
+ {
+ DEBUGP("pos = %u -- store_table_size = %u -- store_table[%u] = %u\n", pos, store_table_size, i, *(*buf + pos + i));
+ }
+ #endif
+ DEBUGP("Dumped compressed sequence structure with size %u bytes\n", *buflen);
+}
+
+void compressed_seq_load(compressed_seq_t * cs, const char * buf, cmph_uint32 buflen)
+{
+ register cmph_uint32 pos = 0;
+ cmph_uint32 buflen_sel = 0;
+ register cmph_uint32 length_rems_size = 0;
+ register cmph_uint32 store_table_size = 0;
+
+ // loading n, rem_r and total_length
+ memcpy(&(cs->n), buf, sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("n = %u\n", cs->n);
+
+ memcpy(&(cs->rem_r), buf + pos, sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("rem_r = %u\n", cs->rem_r);
+
+ memcpy(&(cs->total_length), buf + pos, sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("total_length = %u\n", cs->total_length);
+
+ // loading sel
+ memcpy(&buflen_sel, buf + pos, sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("buflen_sel = %u\n", buflen_sel);
+
+ select_load(&cs->sel, buf + pos, buflen_sel);
+ #ifdef DEBUG
+ cmph_uint32 i = 0;
+ for(i = 0; i < buflen_sel; i++)
+ {
+ DEBUGP("pos = %u -- buf_sel[%u] = %u\n", pos, i, *(buf + pos + i));
+ }
+ #endif
+ pos += buflen_sel;
+
+ // loading length_rems
+ if(cs->length_rems)
+ {
+ free(cs->length_rems);
+ }
+ length_rems_size = BITS_TABLE_SIZE(cs->n, cs->rem_r);
+ cs->length_rems = (cmph_uint32 *) calloc(length_rems_size, sizeof(cmph_uint32));
+ length_rems_size *= 4;
+ memcpy(cs->length_rems, buf + pos, length_rems_size);
+
+ #ifdef DEBUG
+ for(i = 0; i < length_rems_size; i++)
+ {
+ DEBUGP("pos = %u -- length_rems_size = %u -- length_rems[%u] = %u\n", pos, length_rems_size, i, *(buf + pos + i));
+ }
+ #endif
+ pos += length_rems_size;
+
+ // loading store_table
+ store_table_size = ((cs->total_length + 31) >> 5);
+ if(cs->store_table)
+ {
+ free(cs->store_table);
+ }
+ cs->store_table = (cmph_uint32 *) calloc(store_table_size, sizeof(cmph_uint32));
+ store_table_size *= 4;
+ memcpy(cs->store_table, buf + pos, store_table_size);
+
+ #ifdef DEBUG
+ for(i = 0; i < store_table_size; i++)
+ {
+ DEBUGP("pos = %u -- store_table_size = %u -- store_table[%u] = %u\n", pos, store_table_size, i, *(buf + pos + i));
+ }
+ #endif
+
+ DEBUGP("Loaded compressed sequence structure with size %u bytes\n", buflen);
+}
+
+void compressed_seq_pack(compressed_seq_t *cs, void *cs_packed)
+{
+ if (cs && cs_packed)
+ {
+ char *buf = NULL;
+ cmph_uint32 buflen = 0;
+ compressed_seq_dump(cs, &buf, &buflen);
+ memcpy(cs_packed, buf, buflen);
+ free(buf);
+ }
+
+}
+
+cmph_uint32 compressed_seq_packed_size(compressed_seq_t *cs)
+{
+ register cmph_uint32 sel_size = select_packed_size(&cs->sel);
+ register cmph_uint32 store_table_size = ((cs->total_length + 31) >> 5) * (cmph_uint32)sizeof(cmph_uint32);
+ register cmph_uint32 length_rems_size = BITS_TABLE_SIZE(cs->n, cs->rem_r) * (cmph_uint32)sizeof(cmph_uint32);
+ return 4 * (cmph_uint32)sizeof(cmph_uint32) + sel_size + store_table_size + length_rems_size;
+}
+
+
+cmph_uint32 compressed_seq_query_packed(void * cs_packed, cmph_uint32 idx)
+{
+ // unpacking cs_packed
+ register cmph_uint32 *ptr = (cmph_uint32 *)cs_packed;
+ register cmph_uint32 n = *ptr++;
+ register cmph_uint32 rem_r = *ptr++;
+ ptr++; // skipping total_length
+// register cmph_uint32 total_length = *ptr++;
+ register cmph_uint32 buflen_sel = *ptr++;
+ register cmph_uint32 * sel_packed = ptr;
+ register cmph_uint32 * length_rems = (ptr += (buflen_sel >> 2));
+ register cmph_uint32 length_rems_size = BITS_TABLE_SIZE(n, rem_r);
+ register cmph_uint32 * store_table = (ptr += length_rems_size);
+
+ // compressed sequence query computation
+ register cmph_uint32 enc_idx, enc_length;
+ register cmph_uint32 rems_mask;
+ register cmph_uint32 stored_value;
+ register cmph_uint32 sel_res;
+
+ rems_mask = (1U << rem_r) - 1U;
+
+ if(idx == 0)
+ {
+ enc_idx = 0;
+ sel_res = select_query_packed(sel_packed, idx);
+ }
+ else
+ {
+ sel_res = select_query_packed(sel_packed, idx - 1);
+
+ enc_idx = (sel_res - (idx - 1)) << rem_r;
+ enc_idx += get_bits_value(length_rems, idx-1, rem_r, rems_mask);
+
+ sel_res = select_next_query_packed(sel_packed, sel_res);
+ };
+
+ enc_length = (sel_res - idx) << rem_r;
+ enc_length += get_bits_value(length_rems, idx, rem_r, rems_mask);
+ enc_length -= enc_idx;
+ if(enc_length == 0)
+ return 0;
+
+ stored_value = get_bits_at_pos(store_table, enc_idx, enc_length);
+ return stored_value + ((1U << enc_length) - 1U);
+}
diff --git a/girepository/cmph/compressed_seq.h b/girepository/cmph/compressed_seq.h
new file mode 100644
index 00000000..8d87fc70
--- /dev/null
+++ b/girepository/cmph/compressed_seq.h
@@ -0,0 +1,84 @@
+#ifndef __CMPH_COMPRESSED_SEQ_H__
+#define __CMPH_COMPRESSED_SEQ_H__
+
+#include"select.h"
+
+struct _compressed_seq_t
+{
+ cmph_uint32 n; // number of values stored in store_table
+ // The length in bits of each value is decomposed into two compnents: the lg(n) MSBs are stored in rank_select data structure
+ // the remaining LSBs are stored in a table of n cells, each one of rem_r bits.
+ cmph_uint32 rem_r;
+ cmph_uint32 total_length; // total length in bits of stored_table
+ select_t sel;
+ cmph_uint32 * length_rems;
+ cmph_uint32 * store_table;
+};
+
+typedef struct _compressed_seq_t compressed_seq_t;
+
+/** \fn void compressed_seq_init(compressed_seq_t * cs);
+ * \brief Initialize a compressed sequence structure.
+ * \param cs points to the compressed sequence structure to be initialized
+ */
+void compressed_seq_init(compressed_seq_t * cs);
+
+/** \fn void compressed_seq_destroy(compressed_seq_t * cs);
+ * \brief Destroy a compressed sequence given as input.
+ * \param cs points to the compressed sequence structure to be destroyed
+ */
+void compressed_seq_destroy(compressed_seq_t * cs);
+
+/** \fn void compressed_seq_generate(compressed_seq_t * cs, cmph_uint32 * vals_table, cmph_uint32 n);
+ * \brief Generate a compressed sequence from an input array with n values.
+ * \param cs points to the compressed sequence structure
+ * \param vals_table poiter to the array given as input
+ * \param n number of values in @see vals_table
+ */
+void compressed_seq_generate(compressed_seq_t * cs, cmph_uint32 * vals_table, cmph_uint32 n);
+
+
+/** \fn cmph_uint32 compressed_seq_query(compressed_seq_t * cs, cmph_uint32 idx);
+ * \brief Returns the value stored at index @see idx of the compressed sequence structure.
+ * \param cs points to the compressed sequence structure
+ * \param idx index to retrieve the value from
+ * \return the value stored at index @see idx of the compressed sequence structure
+ */
+cmph_uint32 compressed_seq_query(compressed_seq_t * cs, cmph_uint32 idx);
+
+
+/** \fn cmph_uint32 compressed_seq_get_space_usage(compressed_seq_t * cs);
+ * \brief Returns amount of space (in bits) to store the compressed sequence.
+ * \param cs points to the compressed sequence structure
+ * \return the amount of space (in bits) to store @see cs
+ */
+cmph_uint32 compressed_seq_get_space_usage(compressed_seq_t * cs);
+
+void compressed_seq_dump(compressed_seq_t * cs, char ** buf, cmph_uint32 * buflen);
+
+void compressed_seq_load(compressed_seq_t * cs, const char * buf, cmph_uint32 buflen);
+
+
+/** \fn void compressed_seq_pack(compressed_seq_t *cs, void *cs_packed);
+ * \brief Support the ability to pack a compressed sequence structure into a preallocated contiguous memory space pointed by cs_packed.
+ * \param cs points to the compressed sequence structure
+ * \param cs_packed pointer to the contiguous memory area used to store the compressed sequence structure. The size of cs_packed must be at least @see compressed_seq_packed_size
+ */
+void compressed_seq_pack(compressed_seq_t *cs, void *cs_packed);
+
+/** \fn cmph_uint32 compressed_seq_packed_size(compressed_seq_t *cs);
+ * \brief Return the amount of space needed to pack a compressed sequence structure.
+ * \return the size of the packed compressed sequence structure or zero for failures
+ */
+cmph_uint32 compressed_seq_packed_size(compressed_seq_t *cs);
+
+
+/** \fn cmph_uint32 compressed_seq_query_packed(void * cs_packed, cmph_uint32 idx);
+ * \brief Returns the value stored at index @see idx of the packed compressed sequence structure.
+ * \param cs_packed is a pointer to a contiguous memory area
+ * \param idx is the index to retrieve the value from
+ * \return the value stored at index @see idx of the packed compressed sequence structure
+ */
+cmph_uint32 compressed_seq_query_packed(void * cs_packed, cmph_uint32 idx);
+
+#endif
diff --git a/girepository/cmph/debug.h b/girepository/cmph/debug.h
new file mode 100644
index 00000000..0f7ddb13
--- /dev/null
+++ b/girepository/cmph/debug.h
@@ -0,0 +1,53 @@
+#ifdef DEBUGP
+#undef DEBUGP
+#endif
+
+#ifdef __cplusplus
+#include <cstdio>
+#ifdef WIN32
+#include <cstring>
+#endif
+#else
+#include <stdio.h>
+#ifdef WIN32
+#include <string.h>
+#endif
+#endif
+
+#ifndef __GNUC__
+#ifndef __DEBUG_H__
+#define __DEBUG_H__
+#include <stdarg.h>
+static void debugprintf(const char *format, ...)
+{
+ va_list ap;
+ char *f = NULL;
+ const char *p="%s:%d ";
+ size_t plen = strlen(p);
+ va_start(ap, format);
+ f = (char *)malloc(plen + strlen(format) + 1);
+ if (!f) return;
+ memcpy(f, p, plen);
+ memcpy(f + plen, format, strlen(format) + 1);
+ vfprintf(stderr, f, ap);
+ va_end(ap);
+ free(f);
+}
+static void dummyprintf(const char *format, ...)
+{}
+#endif
+#endif
+
+#ifdef DEBUG
+#ifndef __GNUC__
+#define DEBUGP debugprintf
+#else
+#define DEBUGP(args...) do { fprintf(stderr, "%s:%d ", __FILE__, __LINE__); fprintf(stderr, ## args); } while(0)
+#endif
+#else
+#ifndef __GNUC__
+#define DEBUGP dummyprintf
+#else
+#define DEBUGP(args...)
+#endif
+#endif
diff --git a/girepository/cmph/djb2_hash.c b/girepository/cmph/djb2_hash.c
new file mode 100644
index 00000000..d3b4330a
--- /dev/null
+++ b/girepository/cmph/djb2_hash.c
@@ -0,0 +1,49 @@
+#include "djb2_hash.h"
+#include <stdlib.h>
+
+djb2_state_t *djb2_state_new()
+{
+ djb2_state_t *state = (djb2_state_t *)malloc(sizeof(djb2_state_t));
+ state->hashfunc = CMPH_HASH_DJB2;
+ return state;
+}
+
+void djb2_state_destroy(djb2_state_t *state)
+{
+ free(state);
+}
+
+cmph_uint32 djb2_hash(djb2_state_t *state, const char *k, cmph_uint32 keylen)
+{
+ register cmph_uint32 hash = 5381;
+ const unsigned char *ptr = (unsigned char *)k;
+ cmph_uint32 i = 0;
+ while (i < keylen)
+ {
+ hash = hash*33 ^ *ptr;
+ ++ptr, ++i;
+ }
+ return hash;
+}
+
+
+void djb2_state_dump(djb2_state_t *state, char **buf, cmph_uint32 *buflen)
+{
+ *buf = NULL;
+ *buflen = 0;
+ return;
+}
+
+djb2_state_t *djb2_state_copy(djb2_state_t *src_state)
+{
+ djb2_state_t *dest_state = (djb2_state_t *)malloc(sizeof(djb2_state_t));
+ dest_state->hashfunc = src_state->hashfunc;
+ return dest_state;
+}
+
+djb2_state_t *djb2_state_load(const char *buf, cmph_uint32 buflen)
+{
+ djb2_state_t *state = (djb2_state_t *)malloc(sizeof(djb2_state_t));
+ state->hashfunc = CMPH_HASH_DJB2;
+ return state;
+}
diff --git a/girepository/cmph/djb2_hash.h b/girepository/cmph/djb2_hash.h
new file mode 100644
index 00000000..dda97e31
--- /dev/null
+++ b/girepository/cmph/djb2_hash.h
@@ -0,0 +1,18 @@
+#ifndef __DJB2_HASH_H__
+#define __DJB2_HASH_H__
+
+#include "hash.h"
+
+typedef struct __djb2_state_t
+{
+ CMPH_HASH hashfunc;
+} djb2_state_t;
+
+djb2_state_t *djb2_state_new();
+cmph_uint32 djb2_hash(djb2_state_t *state, const char *k, cmph_uint32 keylen);
+void djb2_state_dump(djb2_state_t *state, char **buf, cmph_uint32 *buflen);
+djb2_state_t *djb2_state_copy(djb2_state_t *src_state);
+djb2_state_t *djb2_state_load(const char *buf, cmph_uint32 buflen);
+void djb2_state_destroy(djb2_state_t *state);
+
+#endif
diff --git a/girepository/cmph/fch.c b/girepository/cmph/fch.c
new file mode 100644
index 00000000..67b68fbb
--- /dev/null
+++ b/girepository/cmph/fch.c
@@ -0,0 +1,517 @@
+#include "fch.h"
+#include "cmph_structs.h"
+#include "fch_structs.h"
+#include "hash.h"
+#include "bitbool.h"
+#include "fch_buckets.h"
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#define INDEX 0 /* alignment index within a bucket */
+//#define DEBUG
+#include "debug.h"
+
+static fch_buckets_t * mapping(cmph_config_t *mph);
+static cmph_uint32 * ordering(fch_buckets_t * buckets);
+static cmph_uint8 check_for_collisions_h2(fch_config_data_t *fch, fch_buckets_t * buckets, cmph_uint32 *sorted_indexes);
+static void permut(cmph_uint32 * vector, cmph_uint32 n);
+static cmph_uint8 searching(fch_config_data_t *fch, fch_buckets_t *buckets, cmph_uint32 *sorted_indexes);
+
+fch_config_data_t *fch_config_new()
+{
+ fch_config_data_t *fch;
+ fch = (fch_config_data_t *)malloc(sizeof(fch_config_data_t));
+ assert(fch);
+ memset(fch, 0, sizeof(fch_config_data_t));
+ fch->hashfuncs[0] = CMPH_HASH_JENKINS;
+ fch->hashfuncs[1] = CMPH_HASH_JENKINS;
+ fch->m = fch->b = 0;
+ fch->c = fch->p1 = fch->p2 = 0.0;
+ fch->g = NULL;
+ fch->h1 = NULL;
+ fch->h2 = NULL;
+ return fch;
+}
+
+void fch_config_destroy(cmph_config_t *mph)
+{
+ fch_config_data_t *data = (fch_config_data_t *)mph->data;
+ //DEBUGP("Destroying algorithm dependent data\n");
+ free(data);
+}
+
+void fch_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
+{
+ fch_config_data_t *fch = (fch_config_data_t *)mph->data;
+ CMPH_HASH *hashptr = hashfuncs;
+ cmph_uint32 i = 0;
+ while(*hashptr != CMPH_HASH_COUNT)
+ {
+ if (i >= 2) break; //fch only uses two hash functions
+ fch->hashfuncs[i] = *hashptr;
+ ++i, ++hashptr;
+ }
+}
+
+cmph_uint32 mixh10h11h12(cmph_uint32 b, double p1, double p2, cmph_uint32 initial_index)
+{
+ register cmph_uint32 int_p2 = (cmph_uint32)p2;
+ if (initial_index < p1) initial_index %= int_p2; /* h11 o h10 */
+ else { /* h12 o h10 */
+ initial_index %= b;
+ if(initial_index < p2) initial_index += int_p2;
+ }
+ return initial_index;
+}
+
+
+cmph_uint32 fch_calc_b(double c, cmph_uint32 m)
+{
+ return (cmph_uint32)ceil((c*m)/(log((double)m)/log(2.0) + 1));
+}
+
+double fch_calc_p1(cmph_uint32 m)
+{
+ return ceil(0.55*m);
+}
+
+double fch_calc_p2(cmph_uint32 b)
+{
+ return ceil(0.3*b);
+}
+
+static fch_buckets_t * mapping(cmph_config_t *mph)
+{
+ cmph_uint32 i = 0;
+ fch_buckets_t *buckets = NULL;
+ fch_config_data_t *fch = (fch_config_data_t *)mph->data;
+ if (fch->h1) hash_state_destroy(fch->h1);
+ fch->h1 = hash_state_new(fch->hashfuncs[0], fch->m);
+ fch->b = fch_calc_b(fch->c, fch->m);
+ fch->p1 = fch_calc_p1(fch->m);
+ fch->p2 = fch_calc_p2(fch->b);
+ //DEBUGP("b:%u p1:%f p2:%f\n", fch->b, fch->p1, fch->p2);
+ buckets = fch_buckets_new(fch->b);
+
+ mph->key_source->rewind(mph->key_source->data);
+ for(i = 0; i < fch->m; i++)
+ {
+ cmph_uint32 h1, keylen;
+ char *key = NULL;
+ mph->key_source->read(mph->key_source->data, &key, &keylen);
+ h1 = hash(fch->h1, key, keylen) % fch->m;
+ h1 = mixh10h11h12 (fch->b, fch->p1, fch->p2, h1);
+ fch_buckets_insert(buckets, h1, key, keylen);
+ key = NULL; // transger memory ownership
+
+ }
+ return buckets;
+}
+
+
+// returns the buckets indexes sorted by their sizes.
+static cmph_uint32 * ordering(fch_buckets_t * buckets)
+{
+ return fch_buckets_get_indexes_sorted_by_size(buckets);
+}
+
+/* Check whether function h2 causes collisions among the keys of each bucket */
+static cmph_uint8 check_for_collisions_h2(fch_config_data_t *fch, fch_buckets_t * buckets, cmph_uint32 *sorted_indexes)
+{
+ //cmph_uint32 max_size = fch_buckets_get_max_size(buckets);
+ cmph_uint8 * hashtable = (cmph_uint8 *)calloc((size_t)fch->m, sizeof(cmph_uint8));
+ cmph_uint32 nbuckets = fch_buckets_get_nbuckets(buckets);
+ cmph_uint32 i = 0, index = 0, j =0;
+ for (i = 0; i < nbuckets; i++)
+ {
+ cmph_uint32 nkeys = fch_buckets_get_size(buckets, sorted_indexes[i]);
+ memset(hashtable, 0, (size_t)fch->m);
+ //DEBUGP("bucket %u -- nkeys: %u\n", i, nkeys);
+ for (j = 0; j < nkeys; j++)
+ {
+ char * key = fch_buckets_get_key(buckets, sorted_indexes[i], j);
+ cmph_uint32 keylen = fch_buckets_get_keylength(buckets, sorted_indexes[i], j);
+ index = hash(fch->h2, key, keylen) % fch->m;
+ if(hashtable[index]) { // collision detected
+ free(hashtable);
+ return 1;
+ }
+ hashtable[index] = 1;
+ }
+ }
+ free(hashtable);
+ return 0;
+}
+
+static void permut(cmph_uint32 * vector, cmph_uint32 n)
+{
+ cmph_uint32 i, j, b;
+ for (i = 0; i < n; i++) {
+ j = (cmph_uint32) rand() % n;
+ b = vector[i];
+ vector[i] = vector[j];
+ vector[j] = b;
+ }
+}
+
+static cmph_uint8 searching(fch_config_data_t *fch, fch_buckets_t *buckets, cmph_uint32 *sorted_indexes)
+{
+ cmph_uint32 * random_table = (cmph_uint32 *) calloc((size_t)fch->m, sizeof(cmph_uint32));
+ cmph_uint32 * map_table = (cmph_uint32 *) calloc((size_t)fch->m, sizeof(cmph_uint32));
+ cmph_uint32 iteration_to_generate_h2 = 0;
+ cmph_uint32 searching_iterations = 0;
+ cmph_uint8 restart = 0;
+ cmph_uint32 nbuckets = fch_buckets_get_nbuckets(buckets);
+ cmph_uint32 i, j, z, counter = 0, filled_count = 0;
+ if (fch->g) free (fch->g);
+ fch->g = (cmph_uint32 *) calloc((size_t)fch->b, sizeof(cmph_uint32));
+
+ //DEBUGP("max bucket size: %u\n", fch_buckets_get_max_size(buckets));
+
+ for(i = 0; i < fch->m; i++)
+ {
+ random_table[i] = i;
+ }
+ permut(random_table, fch->m);
+ for(i = 0; i < fch->m; i++)
+ {
+ map_table[random_table[i]] = i;
+ }
+ do {
+ if (fch->h2) hash_state_destroy(fch->h2);
+ fch->h2 = hash_state_new(fch->hashfuncs[1], fch->m);
+ restart = check_for_collisions_h2(fch, buckets, sorted_indexes);
+ filled_count = 0;
+ if (!restart)
+ {
+ searching_iterations++; iteration_to_generate_h2 = 0;
+ //DEBUGP("searching_iterations: %u\n", searching_iterations);
+ }
+ else {
+ iteration_to_generate_h2++;
+ //DEBUGP("iteration_to_generate_h2: %u\n", iteration_to_generate_h2);
+ }
+ for(i = 0; (i < nbuckets) && !restart; i++) {
+ cmph_uint32 bucketsize = fch_buckets_get_size(buckets, sorted_indexes[i]);
+ if (bucketsize == 0)
+ {
+ restart = 0; // false
+ break;
+ }
+ else restart = 1; // true
+ for(z = 0; (z < (fch->m - filled_count)) && restart; z++) {
+ char * key = fch_buckets_get_key(buckets, sorted_indexes[i], INDEX);
+ cmph_uint32 keylen = fch_buckets_get_keylength(buckets, sorted_indexes[i], INDEX);
+ cmph_uint32 h2 = hash(fch->h2, key, keylen) % fch->m;
+ counter = 0;
+ restart = 0; // false
+ fch->g[sorted_indexes[i]] = (fch->m + random_table[filled_count + z] - h2) % fch->m;
+ //DEBUGP("g[%u]: %u\n", sorted_indexes[i], fch->g[sorted_indexes[i]]);
+ j = INDEX;
+ do {
+ cmph_uint32 index = 0;
+ key = fch_buckets_get_key(buckets, sorted_indexes[i], j);
+ keylen = fch_buckets_get_keylength(buckets, sorted_indexes[i], j);
+ h2 = hash(fch->h2, key, keylen) % fch->m;
+ index = (h2 + fch->g[sorted_indexes[i]]) % fch->m;
+ //DEBUGP("key:%s keylen:%u index: %u h2:%u bucketsize:%u\n", key, keylen, index, h2, bucketsize);
+ if (map_table[index] >= filled_count) {
+ cmph_uint32 y = map_table[index];
+ cmph_uint32 ry = random_table[y];
+ random_table[y] = random_table[filled_count];
+ random_table[filled_count] = ry;
+ map_table[random_table[y]] = y;
+ map_table[random_table[filled_count]] = filled_count;
+ filled_count++;
+ counter ++;
+ }
+ else {
+ restart = 1; // true
+ filled_count = filled_count - counter;
+ counter = 0;
+ break;
+ }
+ j = (j + 1) % bucketsize;
+ } while(j % bucketsize != INDEX);
+ }
+ //getchar();
+ }
+ } while(restart && (searching_iterations < 10) && (iteration_to_generate_h2 < 1000));
+ free(map_table);
+ free(random_table);
+ return restart;
+}
+
+
+
+cmph_t *fch_new(cmph_config_t *mph, double c)
+{
+ cmph_t *mphf = NULL;
+ fch_data_t *fchf = NULL;
+ cmph_uint32 iterations = 100;
+ cmph_uint8 restart_mapping = 0;
+ fch_buckets_t * buckets = NULL;
+ cmph_uint32 * sorted_indexes = NULL;
+ fch_config_data_t *fch = (fch_config_data_t *)mph->data;
+ fch->m = mph->key_source->nkeys;
+ //DEBUGP("m: %f\n", fch->m);
+ if (c <= 2) c = 2.6; // validating restrictions over parameter c.
+ fch->c = c;
+ //DEBUGP("c: %f\n", fch->c);
+ fch->h1 = NULL;
+ fch->h2 = NULL;
+ fch->g = NULL;
+ do
+ {
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Entering mapping step for mph creation of %u keys\n", fch->m);
+ }
+ if (buckets) fch_buckets_destroy(buckets);
+ buckets = mapping(mph);
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Starting ordering step\n");
+ }
+ if (sorted_indexes) free (sorted_indexes);
+ sorted_indexes = ordering(buckets);
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Starting searching step.\n");
+ }
+ restart_mapping = searching(fch, buckets, sorted_indexes);
+ iterations--;
+
+ } while(restart_mapping && iterations > 0);
+ if (buckets) fch_buckets_destroy(buckets);
+ if (sorted_indexes) free (sorted_indexes);
+ if (iterations == 0) return NULL;
+ mphf = (cmph_t *)malloc(sizeof(cmph_t));
+ mphf->algo = mph->algo;
+ fchf = (fch_data_t *)malloc(sizeof(fch_data_t));
+ fchf->g = fch->g;
+ fch->g = NULL; //transfer memory ownership
+ fchf->h1 = fch->h1;
+ fch->h1 = NULL; //transfer memory ownership
+ fchf->h2 = fch->h2;
+ fch->h2 = NULL; //transfer memory ownership
+ fchf->p2 = fch->p2;
+ fchf->p1 = fch->p1;
+ fchf->b = fch->b;
+ fchf->c = fch->c;
+ fchf->m = fch->m;
+ mphf->data = fchf;
+ mphf->size = fch->m;
+ //DEBUGP("Successfully generated minimal perfect hash\n");
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Successfully generated minimal perfect hash function\n");
+ }
+ return mphf;
+}
+
+int fch_dump(cmph_t *mphf, FILE *fd)
+{
+ char *buf = NULL;
+ cmph_uint32 buflen;
+ register size_t nbytes;
+
+ fch_data_t *data = (fch_data_t *)mphf->data;
+ __cmph_dump(mphf, fd);
+
+ hash_state_dump(data->h1, &buf, &buflen);
+ //DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
+ nbytes = fwrite(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
+ nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd);
+ free(buf);
+
+ hash_state_dump(data->h2, &buf, &buflen);
+ //DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
+ nbytes = fwrite(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
+ nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd);
+ free(buf);
+
+ nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
+ nbytes = fwrite(&(data->c), sizeof(double), (size_t)1, fd);
+ nbytes = fwrite(&(data->b), sizeof(cmph_uint32), (size_t)1, fd);
+ nbytes = fwrite(&(data->p1), sizeof(double), (size_t)1, fd);
+ nbytes = fwrite(&(data->p2), sizeof(double), (size_t)1, fd);
+ nbytes = fwrite(data->g, sizeof(cmph_uint32)*(data->b), (size_t)1, fd);
+ #ifdef DEBUG
+ cmph_uint32 i;
+ fprintf(stderr, "G: ");
+ for (i = 0; i < data->b; ++i) fprintf(stderr, "%u ", data->g[i]);
+ fprintf(stderr, "\n");
+ #endif
+ return 1;
+}
+
+void fch_load(FILE *f, cmph_t *mphf)
+{
+ char *buf = NULL;
+ cmph_uint32 buflen;
+ register size_t nbytes;
+ fch_data_t *fch = (fch_data_t *)malloc(sizeof(fch_data_t));
+
+ //DEBUGP("Loading fch mphf\n");
+ mphf->data = fch;
+ //DEBUGP("Reading h1\n");
+ fch->h1 = NULL;
+ nbytes = fread(&buflen, sizeof(cmph_uint32), (size_t)1, f);
+ //DEBUGP("Hash state of h1 has %u bytes\n", buflen);
+ buf = (char *)malloc((size_t)buflen);
+ nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
+ fch->h1 = hash_state_load(buf, buflen);
+ free(buf);
+
+ //DEBUGP("Loading fch mphf\n");
+ mphf->data = fch;
+ //DEBUGP("Reading h2\n");
+ fch->h2 = NULL;
+ nbytes = fread(&buflen, sizeof(cmph_uint32), (size_t)1, f);
+ //DEBUGP("Hash state of h2 has %u bytes\n", buflen);
+ buf = (char *)malloc((size_t)buflen);
+ nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
+ fch->h2 = hash_state_load(buf, buflen);
+ free(buf);
+
+
+ //DEBUGP("Reading m and n\n");
+ nbytes = fread(&(fch->m), sizeof(cmph_uint32), (size_t)1, f);
+ nbytes = fread(&(fch->c), sizeof(double), (size_t)1, f);
+ nbytes = fread(&(fch->b), sizeof(cmph_uint32), (size_t)1, f);
+ nbytes = fread(&(fch->p1), sizeof(double), (size_t)1, f);
+ nbytes = fread(&(fch->p2), sizeof(double), (size_t)1, f);
+
+ fch->g = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*fch->b);
+ nbytes = fread(fch->g, fch->b*sizeof(cmph_uint32), (size_t)1, f);
+ #ifdef DEBUG
+ cmph_uint32 i;
+ fprintf(stderr, "G: ");
+ for (i = 0; i < fch->b; ++i) fprintf(stderr, "%u ", fch->g[i]);
+ fprintf(stderr, "\n");
+ #endif
+ return;
+}
+
+cmph_uint32 fch_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
+{
+ fch_data_t *fch = mphf->data;
+ cmph_uint32 h1 = hash(fch->h1, key, keylen) % fch->m;
+ cmph_uint32 h2 = hash(fch->h2, key, keylen) % fch->m;
+ h1 = mixh10h11h12 (fch->b, fch->p1, fch->p2, h1);
+ //DEBUGP("key: %s h1: %u h2: %u g[h1]: %u\n", key, h1, h2, fch->g[h1]);
+ return (h2 + fch->g[h1]) % fch->m;
+}
+void fch_destroy(cmph_t *mphf)
+{
+ fch_data_t *data = (fch_data_t *)mphf->data;
+ free(data->g);
+ hash_state_destroy(data->h1);
+ hash_state_destroy(data->h2);
+ free(data);
+ free(mphf);
+}
+
+/** \fn void fch_pack(cmph_t *mphf, void *packed_mphf);
+ * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
+ * \param mphf pointer to the resulting mphf
+ * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
+ */
+void fch_pack(cmph_t *mphf, void *packed_mphf)
+{
+ fch_data_t *data = (fch_data_t *)mphf->data;
+ cmph_uint8 * ptr = packed_mphf;
+
+ // packing h1 type
+ CMPH_HASH h1_type = hash_get_type(data->h1);
+ *((cmph_uint32 *) ptr) = h1_type;
+ ptr += sizeof(cmph_uint32);
+
+ // packing h1
+ hash_state_pack(data->h1, ptr);
+ ptr += hash_state_packed_size(h1_type);
+
+ // packing h2 type
+ CMPH_HASH h2_type = hash_get_type(data->h2);
+ *((cmph_uint32 *) ptr) = h2_type;
+ ptr += sizeof(cmph_uint32);
+
+ // packing h2
+ hash_state_pack(data->h2, ptr);
+ ptr += hash_state_packed_size(h2_type);
+
+ // packing m
+ *((cmph_uint32 *) ptr) = data->m;
+ ptr += sizeof(data->m);
+
+ // packing b
+ *((cmph_uint32 *) ptr) = data->b;
+ ptr += sizeof(data->b);
+
+ // packing p1
+ *((cmph_uint64 *)ptr) = (cmph_uint64)data->p1;
+ ptr += sizeof(data->p1);
+
+ // packing p2
+ *((cmph_uint64 *)ptr) = (cmph_uint64)data->p2;
+ ptr += sizeof(data->p2);
+
+ // packing g
+ memcpy(ptr, data->g, sizeof(cmph_uint32)*(data->b));
+}
+
+/** \fn cmph_uint32 fch_packed_size(cmph_t *mphf);
+ * \brief Return the amount of space needed to pack mphf.
+ * \param mphf pointer to a mphf
+ * \return the size of the packed function or zero for failures
+ */
+cmph_uint32 fch_packed_size(cmph_t *mphf)
+{
+ fch_data_t *data = (fch_data_t *)mphf->data;
+ CMPH_HASH h1_type = hash_get_type(data->h1);
+ CMPH_HASH h2_type = hash_get_type(data->h2);
+
+ return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
+ 4*sizeof(cmph_uint32) + 2*sizeof(double) + sizeof(cmph_uint32)*(data->b));
+}
+
+
+/** cmph_uint32 fch_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
+ * \brief Use the packed mphf to do a search.
+ * \param packed_mphf pointer to the packed mphf
+ * \param key key to be hashed
+ * \param keylen key legth in bytes
+ * \return The mphf value
+ */
+cmph_uint32 fch_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
+{
+ register cmph_uint8 *h1_ptr = packed_mphf;
+ register CMPH_HASH h1_type = *((cmph_uint32 *)h1_ptr);
+ h1_ptr += 4;
+
+ register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
+ register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr);
+ h2_ptr += 4;
+
+ register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type));
+
+ register cmph_uint32 m = *g_ptr++;
+
+ register cmph_uint32 b = *g_ptr++;
+
+ register double p1 = (double)(*((cmph_uint64 *)g_ptr));
+ g_ptr += 2;
+
+ register double p2 = (double)(*((cmph_uint64 *)g_ptr));
+ g_ptr += 2;
+
+ register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % m;
+ register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % m;
+
+ h1 = mixh10h11h12 (b, p1, p2, h1);
+ return (h2 + g_ptr[h1]) % m;
+}
+
diff --git a/girepository/cmph/fch.h b/girepository/cmph/fch.h
new file mode 100644
index 00000000..ec4f0f5b
--- /dev/null
+++ b/girepository/cmph/fch.h
@@ -0,0 +1,48 @@
+#ifndef __CMPH_FCH_H__
+#define __CMPH_FCH_H__
+
+#include "cmph.h"
+
+typedef struct __fch_data_t fch_data_t;
+typedef struct __fch_config_data_t fch_config_data_t;
+
+/* Parameters calculation */
+cmph_uint32 fch_calc_b(double c, cmph_uint32 m);
+double fch_calc_p1(cmph_uint32 m);
+double fch_calc_p2(cmph_uint32 b);
+cmph_uint32 mixh10h11h12(cmph_uint32 b, double p1, double p2, cmph_uint32 initial_index);
+
+fch_config_data_t *fch_config_new();
+void fch_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
+void fch_config_destroy(cmph_config_t *mph);
+cmph_t *fch_new(cmph_config_t *mph, double c);
+
+void fch_load(FILE *f, cmph_t *mphf);
+int fch_dump(cmph_t *mphf, FILE *f);
+void fch_destroy(cmph_t *mphf);
+cmph_uint32 fch_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
+
+/** \fn void fch_pack(cmph_t *mphf, void *packed_mphf);
+ * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
+ * \param mphf pointer to the resulting mphf
+ * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
+ */
+void fch_pack(cmph_t *mphf, void *packed_mphf);
+
+/** \fn cmph_uint32 fch_packed_size(cmph_t *mphf);
+ * \brief Return the amount of space needed to pack mphf.
+ * \param mphf pointer to a mphf
+ * \return the size of the packed function or zero for failures
+ */
+cmph_uint32 fch_packed_size(cmph_t *mphf);
+
+/** cmph_uint32 fch_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
+ * \brief Use the packed mphf to do a search.
+ * \param packed_mphf pointer to the packed mphf
+ * \param key key to be hashed
+ * \param keylen key legth in bytes
+ * \return The mphf value
+ */
+cmph_uint32 fch_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
+
+#endif
diff --git a/girepository/cmph/fch_buckets.c b/girepository/cmph/fch_buckets.c
new file mode 100644
index 00000000..24b98e67
--- /dev/null
+++ b/girepository/cmph/fch_buckets.c
@@ -0,0 +1,214 @@
+#include "vqueue.h"
+#include "fch_buckets.h"
+#include <stdio.h>
+#include <assert.h>
+#include <stdlib.h>
+//#define DEBUG
+#include "debug.h"
+
+typedef struct __fch_bucket_entry_t
+{
+ char * value;
+ cmph_uint32 length;
+} fch_bucket_entry_t;
+
+typedef struct __fch_bucket_t
+{
+ fch_bucket_entry_t * entries;
+ cmph_uint32 capacity, size;
+} fch_bucket_t;
+
+
+
+static void fch_bucket_new(fch_bucket_t *bucket)
+{
+ assert(bucket);
+ bucket->size = 0;
+ bucket->entries = NULL;
+ bucket->capacity = 0;
+}
+
+static void fch_bucket_destroy(fch_bucket_t *bucket)
+{
+ cmph_uint32 i;
+ assert(bucket);
+ for (i = 0; i < bucket->size; i++)
+ {
+ free((bucket->entries + i)->value);
+ }
+ free(bucket->entries);
+}
+
+
+static void fch_bucket_reserve(fch_bucket_t *bucket, cmph_uint32 size)
+{
+ assert(bucket);
+ if (bucket->capacity < size)
+ {
+ cmph_uint32 new_capacity = bucket->capacity + 1;
+ DEBUGP("Increasing current capacity %u to %u\n", bucket->capacity, size);
+ while (new_capacity < size)
+ {
+ new_capacity *= 2;
+ }
+ bucket->entries = (fch_bucket_entry_t *)realloc(bucket->entries, sizeof(fch_bucket_entry_t)*new_capacity);
+ assert(bucket->entries);
+ bucket->capacity = new_capacity;
+ DEBUGP("Increased\n");
+ }
+}
+
+static void fch_bucket_insert(fch_bucket_t *bucket, char *val, cmph_uint32 val_length)
+{
+ assert(bucket);
+ fch_bucket_reserve(bucket, bucket->size + 1);
+ (bucket->entries + bucket->size)->value = val;
+ (bucket->entries + bucket->size)->length = val_length;
+ ++(bucket->size);
+}
+
+
+static cmph_uint8 fch_bucket_is_empty(fch_bucket_t *bucket)
+{
+ assert(bucket);
+ return (cmph_uint8)(bucket->size == 0);
+}
+
+static cmph_uint32 fch_bucket_size(fch_bucket_t *bucket)
+{
+ assert(bucket);
+ return bucket->size;
+}
+
+static char * fch_bucket_get_key(fch_bucket_t *bucket, cmph_uint32 index_key)
+{
+ assert(bucket); assert(index_key < bucket->size);
+ return (bucket->entries + index_key)->value;
+}
+
+static cmph_uint32 fch_bucket_get_length(fch_bucket_t *bucket, cmph_uint32 index_key)
+{
+ assert(bucket); assert(index_key < bucket->size);
+ return (bucket->entries + index_key)->length;
+}
+
+static void fch_bucket_print(fch_bucket_t * bucket, cmph_uint32 index)
+{
+ cmph_uint32 i;
+ assert(bucket);
+ fprintf(stderr, "Printing bucket %u ...\n", index);
+ for (i = 0; i < bucket->size; i++)
+ {
+ fprintf(stderr, " key: %s\n", (bucket->entries + i)->value);
+ }
+}
+
+//////////////////////////////////////////////////////////////////////////////////////
+
+struct __fch_buckets_t
+{
+ fch_bucket_t * values;
+ cmph_uint32 nbuckets, max_size;
+
+};
+
+fch_buckets_t * fch_buckets_new(cmph_uint32 nbuckets)
+{
+ cmph_uint32 i;
+ fch_buckets_t *buckets = (fch_buckets_t *)malloc(sizeof(fch_buckets_t));
+ assert(buckets);
+ buckets->values = (fch_bucket_t *)calloc((size_t)nbuckets, sizeof(fch_bucket_t));
+ for (i = 0; i < nbuckets; i++) fch_bucket_new(buckets->values + i);
+ assert(buckets->values);
+ buckets->nbuckets = nbuckets;
+ buckets->max_size = 0;
+ return buckets;
+}
+
+cmph_uint8 fch_buckets_is_empty(fch_buckets_t * buckets, cmph_uint32 index)
+{
+ assert(index < buckets->nbuckets);
+ return fch_bucket_is_empty(buckets->values + index);
+}
+
+void fch_buckets_insert(fch_buckets_t * buckets, cmph_uint32 index, char * key, cmph_uint32 length)
+{
+ assert(index < buckets->nbuckets);
+ fch_bucket_insert(buckets->values + index, key, length);
+ if (fch_bucket_size(buckets->values + index) > buckets->max_size)
+ {
+ buckets->max_size = fch_bucket_size(buckets->values + index);
+ }
+}
+
+cmph_uint32 fch_buckets_get_size(fch_buckets_t * buckets, cmph_uint32 index)
+{
+ assert(index < buckets->nbuckets);
+ return fch_bucket_size(buckets->values + index);
+}
+
+
+char * fch_buckets_get_key(fch_buckets_t * buckets, cmph_uint32 index, cmph_uint32 index_key)
+{
+ assert(index < buckets->nbuckets);
+ return fch_bucket_get_key(buckets->values + index, index_key);
+}
+
+cmph_uint32 fch_buckets_get_keylength(fch_buckets_t * buckets, cmph_uint32 index, cmph_uint32 index_key)
+{
+ assert(index < buckets->nbuckets);
+ return fch_bucket_get_length(buckets->values + index, index_key);
+}
+
+cmph_uint32 fch_buckets_get_max_size(fch_buckets_t * buckets)
+{
+ return buckets->max_size;
+}
+
+cmph_uint32 fch_buckets_get_nbuckets(fch_buckets_t * buckets)
+{
+ return buckets->nbuckets;
+}
+
+cmph_uint32 * fch_buckets_get_indexes_sorted_by_size(fch_buckets_t * buckets)
+{
+ int i = 0;
+ cmph_uint32 sum = 0, value;
+ cmph_uint32 *nbuckets_size = (cmph_uint32 *) calloc((size_t)buckets->max_size + 1, sizeof(cmph_uint32));
+ cmph_uint32 * sorted_indexes = (cmph_uint32 *) calloc((size_t)buckets->nbuckets, sizeof(cmph_uint32));
+
+ // collect how many buckets for each size.
+ for(i = 0; i < buckets->nbuckets; i++) nbuckets_size[fch_bucket_size(buckets->values + i)] ++;
+
+ // calculating offset considering a decreasing order of buckets size.
+ value = nbuckets_size[buckets->max_size];
+ nbuckets_size[buckets->max_size] = sum;
+ for(i = (int)buckets->max_size - 1; i >= 0; i--)
+ {
+ sum += value;
+ value = nbuckets_size[i];
+ nbuckets_size[i] = sum;
+
+ }
+ for(i = 0; i < buckets->nbuckets; i++)
+ {
+ sorted_indexes[nbuckets_size[fch_bucket_size(buckets->values + i)]] = (cmph_uint32)i;
+ nbuckets_size[fch_bucket_size(buckets->values + i)] ++;
+ }
+ free(nbuckets_size);
+ return sorted_indexes;
+}
+
+void fch_buckets_print(fch_buckets_t * buckets)
+{
+ cmph_uint32 i;
+ for (i = 0; i < buckets->nbuckets; i++) fch_bucket_print(buckets->values + i, i);
+}
+
+void fch_buckets_destroy(fch_buckets_t * buckets)
+{
+ cmph_uint32 i;
+ for (i = 0; i < buckets->nbuckets; i++) fch_bucket_destroy(buckets->values + i);
+ free(buckets->values);
+ free(buckets);
+}
diff --git a/girepository/cmph/fch_buckets.h b/girepository/cmph/fch_buckets.h
new file mode 100644
index 00000000..2a1b8b2a
--- /dev/null
+++ b/girepository/cmph/fch_buckets.h
@@ -0,0 +1,30 @@
+#ifndef __CMPH_FCH_BUCKETS_H__
+#define __CMPH_FCH_BUCKETS_H__
+
+#include "cmph_types.h"
+typedef struct __fch_buckets_t fch_buckets_t;
+
+fch_buckets_t * fch_buckets_new(cmph_uint32 nbuckets);
+
+cmph_uint8 fch_buckets_is_empty(fch_buckets_t * buckets, cmph_uint32 index);
+
+void fch_buckets_insert(fch_buckets_t * buckets, cmph_uint32 index, char * key, cmph_uint32 length);
+
+cmph_uint32 fch_buckets_get_size(fch_buckets_t * buckets, cmph_uint32 index);
+
+char * fch_buckets_get_key(fch_buckets_t * buckets, cmph_uint32 index, cmph_uint32 index_key);
+
+cmph_uint32 fch_buckets_get_keylength(fch_buckets_t * buckets, cmph_uint32 index, cmph_uint32 index_key);
+
+// returns the size of biggest bucket.
+cmph_uint32 fch_buckets_get_max_size(fch_buckets_t * buckets);
+
+// returns the number of buckets.
+cmph_uint32 fch_buckets_get_nbuckets(fch_buckets_t * buckets);
+
+cmph_uint32 * fch_buckets_get_indexes_sorted_by_size(fch_buckets_t * buckets);
+
+void fch_buckets_print(fch_buckets_t * buckets);
+
+void fch_buckets_destroy(fch_buckets_t * buckets);
+#endif
diff --git a/girepository/cmph/fch_structs.h b/girepository/cmph/fch_structs.h
new file mode 100755
index 00000000..fcd1555e
--- /dev/null
+++ b/girepository/cmph/fch_structs.h
@@ -0,0 +1,30 @@
+#ifndef __CMPH_FCH_STRUCTS_H__
+#define __CMPH_FCH_STRUCTS_H__
+
+#include "hash_state.h"
+
+struct __fch_data_t
+{
+ cmph_uint32 m; // words count
+ double c; // constant c
+ cmph_uint32 b; // parameter b = ceil(c*m/(log(m)/log(2) + 1)). Don't need to be stored
+ double p1; // constant p1 = ceil(0.6*m). Don't need to be stored
+ double p2; // constant p2 = ceil(0.3*b). Don't need to be stored
+ cmph_uint32 *g; // g function.
+ hash_state_t *h1; // h10 function.
+ hash_state_t *h2; // h20 function.
+};
+
+struct __fch_config_data_t
+{
+ CMPH_HASH hashfuncs[2];
+ cmph_uint32 m; // words count
+ double c; // constant c
+ cmph_uint32 b; // parameter b = ceil(c*m/(log(m)/log(2) + 1)). Don't need to be stored
+ double p1; // constant p1 = ceil(0.6*m). Don't need to be stored
+ double p2; // constant p2 = ceil(0.3*b). Don't need to be stored
+ cmph_uint32 *g; // g function.
+ hash_state_t *h1; // h10 function.
+ hash_state_t *h2; // h20 function.
+};
+#endif
diff --git a/girepository/cmph/fnv_hash.c b/girepository/cmph/fnv_hash.c
new file mode 100644
index 00000000..aeaca8ff
--- /dev/null
+++ b/girepository/cmph/fnv_hash.c
@@ -0,0 +1,53 @@
+#include "fnv_hash.h"
+#include <stdlib.h>
+
+fnv_state_t *fnv_state_new()
+{
+ fnv_state_t *state = (fnv_state_t *)malloc(sizeof(fnv_state_t));
+ state->hashfunc = CMPH_HASH_FNV;
+ return state;
+}
+
+void fnv_state_destroy(fnv_state_t *state)
+{
+ free(state);
+}
+
+cmph_uint32 fnv_hash(fnv_state_t *state, const char *k, cmph_uint32 keylen)
+{
+ const unsigned char *bp = (const unsigned char *)k;
+ const unsigned char *be = bp + keylen;
+ static unsigned int hval = 0;
+
+ while (bp < be)
+ {
+
+ //hval *= 0x01000193; good for non-gcc compiler
+ hval += (hval << 1) + (hval << 4) + (hval << 7) + (hval << 8) + (hval << 24); //good for gcc
+
+ hval ^= *bp++;
+ }
+ return hval;
+}
+
+
+void fnv_state_dump(fnv_state_t *state, char **buf, cmph_uint32 *buflen)
+{
+ *buf = NULL;
+ *buflen = 0;
+ return;
+}
+
+fnv_state_t * fnv_state_copy(fnv_state_t *src_state)
+{
+ fnv_state_t *dest_state = (fnv_state_t *)malloc(sizeof(fnv_state_t));
+ dest_state->hashfunc = src_state->hashfunc;
+ return dest_state;
+}
+
+fnv_state_t *fnv_state_load(const char *buf, cmph_uint32 buflen)
+{
+ fnv_state_t *state = (fnv_state_t *)malloc(sizeof(fnv_state_t));
+ state->hashfunc = CMPH_HASH_FNV;
+ return state;
+}
diff --git a/girepository/cmph/fnv_hash.h b/girepository/cmph/fnv_hash.h
new file mode 100644
index 00000000..7f579465
--- /dev/null
+++ b/girepository/cmph/fnv_hash.h
@@ -0,0 +1,18 @@
+#ifndef __FNV_HASH_H__
+#define __FNV_HASH_H__
+
+#include "hash.h"
+
+typedef struct __fnv_state_t
+{
+ CMPH_HASH hashfunc;
+} fnv_state_t;
+
+fnv_state_t *fnv_state_new();
+cmph_uint32 fnv_hash(fnv_state_t *state, const char *k, cmph_uint32 keylen);
+void fnv_state_dump(fnv_state_t *state, char **buf, cmph_uint32 *buflen);
+fnv_state_t *fnv_state_copy(fnv_state_t *src_state);
+fnv_state_t *fnv_state_load(const char *buf, cmph_uint32 buflen);
+void fnv_state_destroy(fnv_state_t *state);
+
+#endif
diff --git a/girepository/cmph/graph.c b/girepository/cmph/graph.c
new file mode 100644
index 00000000..c29fd8b9
--- /dev/null
+++ b/girepository/cmph/graph.c
@@ -0,0 +1,338 @@
+#include "graph.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <assert.h>
+#include <string.h>
+#include "vstack.h"
+#include "bitbool.h"
+
+//#define DEBUG
+#include "debug.h"
+
+/* static const cmph_uint8 bitmask[8] = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 }; */
+/* #define GETBIT(array, i) (array[(i) / 8] & bitmask[(i) % 8]) */
+/* #define SETBIT(array, i) (array[(i) / 8] |= bitmask[(i) % 8]) */
+/* #define UNSETBIT(array, i) (array[(i) / 8] &= (~(bitmask[(i) % 8]))) */
+
+#define abs_edge(e, i) (e % g->nedges + i * g->nedges)
+
+struct __graph_t
+{
+ cmph_uint32 nnodes;
+ cmph_uint32 nedges;
+ cmph_uint32 *edges;
+ cmph_uint32 *first;
+ cmph_uint32 *next;
+ cmph_uint8 *critical_nodes; /* included -- Fabiano*/
+ cmph_uint32 ncritical_nodes; /* included -- Fabiano*/
+ cmph_uint32 cedges;
+ int shrinking;
+};
+
+static cmph_uint32 EMPTY = UINT_MAX;
+
+graph_t *graph_new(cmph_uint32 nnodes, cmph_uint32 nedges)
+{
+ graph_t *graph = (graph_t *)malloc(sizeof(graph_t));
+ if (!graph) return NULL;
+
+ graph->edges = (cmph_uint32 *)malloc(sizeof(cmph_uint32) * 2 * nedges);
+ graph->next = (cmph_uint32 *)malloc(sizeof(cmph_uint32) * 2 * nedges);
+ graph->first = (cmph_uint32 *)malloc(sizeof(cmph_uint32) * nnodes);
+ graph->critical_nodes = NULL; /* included -- Fabiano*/
+ graph->ncritical_nodes = 0; /* included -- Fabiano*/
+ graph->nnodes = nnodes;
+ graph->nedges = nedges;
+
+ graph_clear_edges(graph);
+ return graph;
+}
+
+
+void graph_destroy(graph_t *graph)
+{
+ DEBUGP("Destroying graph\n");
+ free(graph->edges);
+ free(graph->first);
+ free(graph->next);
+ free(graph->critical_nodes); /* included -- Fabiano*/
+ free(graph);
+ return;
+}
+
+void graph_print(graph_t *g)
+{
+ cmph_uint32 i, e;
+ for (i = 0; i < g->nnodes; ++i)
+ {
+ DEBUGP("Printing edges connected to %u\n", i);
+ e = g->first[i];
+ if (e != EMPTY)
+ {
+ printf("%u -> %u\n", g->edges[abs_edge(e, 0)], g->edges[abs_edge(e, 1)]);
+ while ((e = g->next[e]) != EMPTY)
+ {
+ printf("%u -> %u\n", g->edges[abs_edge(e, 0)], g->edges[abs_edge(e, 1)]);
+ }
+ }
+
+ }
+ return;
+}
+
+void graph_add_edge(graph_t *g, cmph_uint32 v1, cmph_uint32 v2)
+{
+ cmph_uint32 e = g->cedges;
+
+ assert(v1 < g->nnodes);
+ assert(v2 < g->nnodes);
+ assert(e < g->nedges);
+ assert(!g->shrinking);
+
+ g->next[e] = g->first[v1];
+ g->first[v1] = e;
+ g->edges[e] = v2;
+
+ g->next[e + g->nedges] = g->first[v2];
+ g->first[v2] = e + g->nedges;
+ g->edges[e + g->nedges] = v1;
+
+ ++(g->cedges);
+}
+
+static int check_edge(graph_t *g, cmph_uint32 e, cmph_uint32 v1, cmph_uint32 v2)
+{
+ DEBUGP("Checking edge %u %u looking for %u %u\n", g->edges[abs_edge(e, 0)], g->edges[abs_edge(e, 1)], v1, v2);
+ if (g->edges[abs_edge(e, 0)] == v1 && g->edges[abs_edge(e, 1)] == v2) return 1;
+ if (g->edges[abs_edge(e, 0)] == v2 && g->edges[abs_edge(e, 1)] == v1) return 1;
+ return 0;
+}
+
+cmph_uint32 graph_edge_id(graph_t *g, cmph_uint32 v1, cmph_uint32 v2)
+{
+ cmph_uint32 e;
+ e = g->first[v1];
+ assert(e != EMPTY);
+ if (check_edge(g, e, v1, v2)) return abs_edge(e, 0);
+ do
+ {
+ e = g->next[e];
+ assert(e != EMPTY);
+ }
+ while (!check_edge(g, e, v1, v2));
+ return abs_edge(e, 0);
+}
+static void del_edge_point(graph_t *g, cmph_uint32 v1, cmph_uint32 v2)
+{
+ cmph_uint32 e, prev;
+
+ DEBUGP("Deleting edge point %u %u\n", v1, v2);
+ e = g->first[v1];
+ if (check_edge(g, e, v1, v2))
+ {
+ g->first[v1] = g->next[e];
+ //g->edges[e] = EMPTY;
+ DEBUGP("Deleted\n");
+ return;
+ }
+ DEBUGP("Checking linked list\n");
+ do
+ {
+ prev = e;
+ e = g->next[e];
+ assert(e != EMPTY);
+ }
+ while (!check_edge(g, e, v1, v2));
+
+ g->next[prev] = g->next[e];
+ //g->edges[e] = EMPTY;
+ DEBUGP("Deleted\n");
+}
+
+
+void graph_del_edge(graph_t *g, cmph_uint32 v1, cmph_uint32 v2)
+{
+ g->shrinking = 1;
+ del_edge_point(g, v1, v2);
+ del_edge_point(g, v2, v1);
+}
+
+void graph_clear_edges(graph_t *g)
+{
+ cmph_uint32 i;
+ for (i = 0; i < g->nnodes; ++i) g->first[i] = EMPTY;
+ for (i = 0; i < g->nedges*2; ++i)
+ {
+ g->edges[i] = EMPTY;
+ g->next[i] = EMPTY;
+ }
+ g->cedges = 0;
+ g->shrinking = 0;
+}
+
+static cmph_uint8 find_degree1_edge(graph_t *g, cmph_uint32 v, cmph_uint8 *deleted, cmph_uint32 *e)
+{
+ cmph_uint32 edge = g->first[v];
+ cmph_uint8 found = 0;
+ DEBUGP("Checking degree of vertex %u\n", v);
+ if (edge == EMPTY) return 0;
+ else if (!(GETBIT(deleted, abs_edge(edge, 0))))
+ {
+ found = 1;
+ *e = edge;
+ }
+ while(1)
+ {
+ edge = g->next[edge];
+ if (edge == EMPTY) break;
+ if (GETBIT(deleted, abs_edge(edge, 0))) continue;
+ if (found) return 0;
+ DEBUGP("Found first edge\n");
+ *e = edge;
+ found = 1;
+ }
+ return found;
+}
+
+static void cyclic_del_edge(graph_t *g, cmph_uint32 v, cmph_uint8 *deleted)
+{
+
+ cmph_uint32 e = 0;
+ cmph_uint8 degree1;
+ cmph_uint32 v1 = v;
+ cmph_uint32 v2 = 0;
+
+ degree1 = find_degree1_edge(g, v1, deleted, &e);
+ if (!degree1) return;
+ while(1)
+ {
+ DEBUGP("Deleting edge %u (%u->%u)\n", e, g->edges[abs_edge(e, 0)], g->edges[abs_edge(e, 1)]);
+ SETBIT(deleted, abs_edge(e, 0));
+
+ v2 = g->edges[abs_edge(e, 0)];
+ if (v2 == v1) v2 = g->edges[abs_edge(e, 1)];
+
+ DEBUGP("Checking if second endpoint %u has degree 1\n", v2);
+ degree1 = find_degree1_edge(g, v2, deleted, &e);
+ if (degree1)
+ {
+ DEBUGP("Inspecting vertex %u\n", v2);
+ v1 = v2;
+ }
+ else break;
+ }
+}
+
+int graph_is_cyclic(graph_t *g)
+{
+ cmph_uint32 i;
+ cmph_uint32 v;
+ cmph_uint8 *deleted = (cmph_uint8 *)malloc((g->nedges*sizeof(cmph_uint8))/8 + 1);
+ size_t deleted_len = g->nedges/8 + 1;
+ memset(deleted, 0, deleted_len);
+
+ DEBUGP("Looking for cycles in graph with %u vertices and %u edges\n", g->nnodes, g->nedges);
+ for (v = 0; v < g->nnodes; ++v)
+ {
+ cyclic_del_edge(g, v, deleted);
+ }
+ for (i = 0; i < g->nedges; ++i)
+ {
+ if (!(GETBIT(deleted, i)))
+ {
+ DEBUGP("Edge %u %u->%u was not deleted\n", i, g->edges[i], g->edges[i + g->nedges]);
+ free(deleted);
+ return 1;
+ }
+ }
+ free(deleted);
+ return 0;
+}
+
+cmph_uint8 graph_node_is_critical(graph_t * g, cmph_uint32 v) /* included -- Fabiano */
+{
+ return (cmph_uint8)GETBIT(g->critical_nodes,v);
+}
+
+void graph_obtain_critical_nodes(graph_t *g) /* included -- Fabiano*/
+{
+ cmph_uint32 i;
+ cmph_uint32 v;
+ cmph_uint8 *deleted = (cmph_uint8 *)malloc((g->nedges*sizeof(cmph_uint8))/8+1);
+ size_t deleted_len = g->nedges/8 + 1;
+ memset(deleted, 0, deleted_len);
+ free(g->critical_nodes);
+ g->critical_nodes = (cmph_uint8 *)malloc((g->nnodes*sizeof(cmph_uint8))/8 + 1);
+ g->ncritical_nodes = 0;
+ memset(g->critical_nodes, 0, (g->nnodes*sizeof(cmph_uint8))/8 + 1);
+ DEBUGP("Looking for the 2-core in graph with %u vertices and %u edges\n", g->nnodes, g->nedges);
+ for (v = 0; v < g->nnodes; ++v)
+ {
+ cyclic_del_edge(g, v, deleted);
+ }
+
+ for (i = 0; i < g->nedges; ++i)
+ {
+ if (!(GETBIT(deleted,i)))
+ {
+ DEBUGP("Edge %u %u->%u belongs to the 2-core\n", i, g->edges[i], g->edges[i + g->nedges]);
+ if(!(GETBIT(g->critical_nodes,g->edges[i])))
+ {
+ g->ncritical_nodes ++;
+ SETBIT(g->critical_nodes,g->edges[i]);
+ }
+ if(!(GETBIT(g->critical_nodes,g->edges[i + g->nedges])))
+ {
+ g->ncritical_nodes ++;
+ SETBIT(g->critical_nodes,g->edges[i + g->nedges]);
+ }
+ }
+ }
+ free(deleted);
+}
+
+cmph_uint8 graph_contains_edge(graph_t *g, cmph_uint32 v1, cmph_uint32 v2) /* included -- Fabiano*/
+{
+ cmph_uint32 e;
+ e = g->first[v1];
+ if(e == EMPTY) return 0;
+ if (check_edge(g, e, v1, v2)) return 1;
+ do
+ {
+ e = g->next[e];
+ if(e == EMPTY) return 0;
+ }
+ while (!check_edge(g, e, v1, v2));
+ return 1;
+}
+
+cmph_uint32 graph_vertex_id(graph_t *g, cmph_uint32 e, cmph_uint32 id) /* included -- Fabiano*/
+{
+ return (g->edges[e + id*g->nedges]);
+}
+
+cmph_uint32 graph_ncritical_nodes(graph_t *g) /* included -- Fabiano*/
+{
+ return g->ncritical_nodes;
+}
+
+graph_iterator_t graph_neighbors_it(graph_t *g, cmph_uint32 v)
+{
+ graph_iterator_t it;
+ it.vertex = v;
+ it.edge = g->first[v];
+ return it;
+}
+cmph_uint32 graph_next_neighbor(graph_t *g, graph_iterator_t* it)
+{
+ cmph_uint32 ret;
+ if(it->edge == EMPTY) return GRAPH_NO_NEIGHBOR;
+ if (g->edges[it->edge] == it->vertex) ret = g->edges[it->edge + g->nedges];
+ else ret = g->edges[it->edge];
+ it->edge = g->next[it->edge];
+ return ret;
+}
+
+
diff --git a/girepository/cmph/graph.h b/girepository/cmph/graph.h
new file mode 100644
index 00000000..e1b5de6f
--- /dev/null
+++ b/girepository/cmph/graph.h
@@ -0,0 +1,40 @@
+#ifndef _CMPH_GRAPH_H__
+#define _CMPH_GRAPH_H__
+
+#include <limits.h>
+#include "cmph_types.h"
+
+#define GRAPH_NO_NEIGHBOR UINT_MAX
+
+typedef struct __graph_t graph_t;
+typedef struct __graph_iterator_t graph_iterator_t;
+struct __graph_iterator_t
+{
+ cmph_uint32 vertex;
+ cmph_uint32 edge;
+};
+
+
+
+graph_t *graph_new(cmph_uint32 nnodes, cmph_uint32 nedges);
+void graph_destroy(graph_t *graph);
+
+void graph_add_edge(graph_t *g, cmph_uint32 v1, cmph_uint32 v2);
+void graph_del_edge(graph_t *g, cmph_uint32 v1, cmph_uint32 v2);
+void graph_clear_edges(graph_t *g);
+cmph_uint32 graph_edge_id(graph_t *g, cmph_uint32 v1, cmph_uint32 v2);
+cmph_uint8 graph_contains_edge(graph_t *g, cmph_uint32 v1, cmph_uint32 v2);
+
+graph_iterator_t graph_neighbors_it(graph_t *g, cmph_uint32 v);
+cmph_uint32 graph_next_neighbor(graph_t *g, graph_iterator_t* it);
+
+void graph_obtain_critical_nodes(graph_t *g); /* included -- Fabiano*/
+cmph_uint8 graph_node_is_critical(graph_t * g, cmph_uint32 v); /* included -- Fabiano */
+cmph_uint32 graph_ncritical_nodes(graph_t *g); /* included -- Fabiano*/
+cmph_uint32 graph_vertex_id(graph_t *g, cmph_uint32 e, cmph_uint32 id); /* included -- Fabiano*/
+
+int graph_is_cyclic(graph_t *g);
+
+void graph_print(graph_t *);
+
+#endif
diff --git a/girepository/cmph/hash.c b/girepository/cmph/hash.c
new file mode 100644
index 00000000..be86d6e7
--- /dev/null
+++ b/girepository/cmph/hash.c
@@ -0,0 +1,216 @@
+#include "hash_state.h"
+#include <stdlib.h>
+#include <assert.h>
+#include <limits.h>
+#include <string.h>
+
+//#define DEBUG
+#include "debug.h"
+
+const char *cmph_hash_names[] = { "jenkins", NULL };
+
+hash_state_t *hash_state_new(CMPH_HASH hashfunc, cmph_uint32 hashsize)
+{
+ hash_state_t *state = NULL;
+ switch (hashfunc)
+ {
+ case CMPH_HASH_JENKINS:
+ DEBUGP("Jenkins function - %u\n", hashsize);
+ state = (hash_state_t *)jenkins_state_new(hashsize);
+ DEBUGP("Jenkins function created\n");
+ break;
+ default:
+ assert(0);
+ }
+ state->hashfunc = hashfunc;
+ return state;
+}
+cmph_uint32 hash(hash_state_t *state, const char *key, cmph_uint32 keylen)
+{
+ switch (state->hashfunc)
+ {
+ case CMPH_HASH_JENKINS:
+ return jenkins_hash((jenkins_state_t *)state, key, keylen);
+ default:
+ assert(0);
+ }
+ assert(0);
+ return 0;
+}
+
+void hash_vector(hash_state_t *state, const char *key, cmph_uint32 keylen, cmph_uint32 * hashes)
+{
+ switch (state->hashfunc)
+ {
+ case CMPH_HASH_JENKINS:
+ jenkins_hash_vector_((jenkins_state_t *)state, key, keylen, hashes);
+ break;
+ default:
+ assert(0);
+ }
+}
+
+
+void hash_state_dump(hash_state_t *state, char **buf, cmph_uint32 *buflen)
+{
+ char *algobuf;
+ size_t len;
+ switch (state->hashfunc)
+ {
+ case CMPH_HASH_JENKINS:
+ jenkins_state_dump((jenkins_state_t *)state, &algobuf, buflen);
+ if (*buflen == UINT_MAX) return;
+ break;
+ default:
+ assert(0);
+ }
+ *buf = (char *)malloc(strlen(cmph_hash_names[state->hashfunc]) + 1 + *buflen);
+ memcpy(*buf, cmph_hash_names[state->hashfunc], strlen(cmph_hash_names[state->hashfunc]) + 1);
+ DEBUGP("Algobuf is %u\n", *(cmph_uint32 *)algobuf);
+ len = *buflen;
+ memcpy(*buf + strlen(cmph_hash_names[state->hashfunc]) + 1, algobuf, len);
+ *buflen = (cmph_uint32)strlen(cmph_hash_names[state->hashfunc]) + 1 + *buflen;
+ free(algobuf);
+ return;
+}
+
+hash_state_t * hash_state_copy(hash_state_t *src_state)
+{
+ hash_state_t *dest_state = NULL;
+ switch (src_state->hashfunc)
+ {
+ case CMPH_HASH_JENKINS:
+ dest_state = (hash_state_t *)jenkins_state_copy((jenkins_state_t *)src_state);
+ break;
+ default:
+ assert(0);
+ }
+ dest_state->hashfunc = src_state->hashfunc;
+ return dest_state;
+}
+
+hash_state_t *hash_state_load(const char *buf, cmph_uint32 buflen)
+{
+ cmph_uint32 i;
+ cmph_uint32 offset;
+ CMPH_HASH hashfunc = CMPH_HASH_COUNT;
+ for (i = 0; i < CMPH_HASH_COUNT; ++i)
+ {
+ if (strcmp(buf, cmph_hash_names[i]) == 0)
+ {
+ hashfunc = i;
+ break;
+ }
+ }
+ if (hashfunc == CMPH_HASH_COUNT) return NULL;
+ offset = (cmph_uint32)strlen(cmph_hash_names[hashfunc]) + 1;
+ switch (hashfunc)
+ {
+ case CMPH_HASH_JENKINS:
+ return (hash_state_t *)jenkins_state_load(buf + offset, buflen - offset);
+ default:
+ return NULL;
+ }
+ return NULL;
+}
+void hash_state_destroy(hash_state_t *state)
+{
+ switch (state->hashfunc)
+ {
+ case CMPH_HASH_JENKINS:
+ jenkins_state_destroy((jenkins_state_t *)state);
+ break;
+ default:
+ assert(0);
+ }
+ return;
+}
+
+/** \fn void hash_state_pack(hash_state_t *state, void *hash_packed)
+ * \brief Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed.
+ * \param state points to the hash function
+ * \param hash_packed pointer to the contiguous memory area used to store the hash function. The size of hash_packed must be at least hash_state_packed_size()
+ *
+ * Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed.
+ * However, the hash function type must be packed outside.
+ */
+void hash_state_pack(hash_state_t *state, void *hash_packed)
+{
+ switch (state->hashfunc)
+ {
+ case CMPH_HASH_JENKINS:
+ // pack the jenkins hash function
+ jenkins_state_pack((jenkins_state_t *)state, hash_packed);
+ break;
+ default:
+ assert(0);
+ }
+ return;
+}
+
+/** \fn cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc)
+ * \brief Return the amount of space needed to pack a hash function.
+ * \param hashfunc function type
+ * \return the size of the packed function or zero for failures
+ */
+cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc)
+{
+ cmph_uint32 size = 0;
+ switch (hashfunc)
+ {
+ case CMPH_HASH_JENKINS:
+ size += jenkins_state_packed_size();
+ break;
+ default:
+ assert(0);
+ }
+ return size;
+}
+
+/** \fn cmph_uint32 hash_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen)
+ * \param hash_packed is a pointer to a contiguous memory area
+ * \param hashfunc is the type of the hash function packed in hash_packed
+ * \param key is a pointer to a key
+ * \param keylen is the key length
+ * \return an integer that represents a hash value of 32 bits.
+ */
+cmph_uint32 hash_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen)
+{
+ switch (hashfunc)
+ {
+ case CMPH_HASH_JENKINS:
+ return jenkins_hash_packed(hash_packed, k, keylen);
+ default:
+ assert(0);
+ }
+ assert(0);
+ return 0;
+}
+
+/** \fn hash_vector_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
+ * \param hash_packed is a pointer to a contiguous memory area
+ * \param key is a pointer to a key
+ * \param keylen is the key length
+ * \param hashes is a pointer to a memory large enough to fit three 32-bit integers.
+ */
+void hash_vector_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
+{
+ switch (hashfunc)
+ {
+ case CMPH_HASH_JENKINS:
+ jenkins_hash_vector_packed(hash_packed, k, keylen, hashes);
+ break;
+ default:
+ assert(0);
+ }
+}
+
+
+/** \fn CMPH_HASH hash_get_type(hash_state_t *state);
+ * \param state is a pointer to a hash_state_t structure
+ * \return the hash function type pointed by state
+ */
+CMPH_HASH hash_get_type(hash_state_t *state)
+{
+ return state->hashfunc;
+}
diff --git a/girepository/cmph/hash.h b/girepository/cmph/hash.h
new file mode 100644
index 00000000..0ec4ce1c
--- /dev/null
+++ b/girepository/cmph/hash.h
@@ -0,0 +1,76 @@
+#ifndef __CMPH_HASH_H__
+#define __CMPH_HASH_H__
+
+#include "cmph_types.h"
+
+typedef union __hash_state_t hash_state_t;
+
+hash_state_t *hash_state_new(CMPH_HASH, cmph_uint32 hashsize);
+
+/** \fn cmph_uint32 hash(hash_state_t *state, const char *key, cmph_uint32 keylen);
+ * \param state is a pointer to a hash_state_t structure
+ * \param key is a pointer to a key
+ * \param keylen is the key length
+ * \return an integer that represents a hash value of 32 bits.
+ */
+cmph_uint32 hash(hash_state_t *state, const char *key, cmph_uint32 keylen);
+
+/** \fn void hash_vector(hash_state_t *state, const char *key, cmph_uint32 keylen, cmph_uint32 * hashes);
+ * \param state is a pointer to a hash_state_t structure
+ * \param key is a pointer to a key
+ * \param keylen is the key length
+ * \param hashes is a pointer to a memory large enough to fit three 32-bit integers.
+ */
+void hash_vector(hash_state_t *state, const char *key, cmph_uint32 keylen, cmph_uint32 * hashes);
+
+void hash_state_dump(hash_state_t *state, char **buf, cmph_uint32 *buflen);
+
+hash_state_t * hash_state_copy(hash_state_t *src_state);
+
+hash_state_t *hash_state_load(const char *buf, cmph_uint32 buflen);
+
+void hash_state_destroy(hash_state_t *state);
+
+/** \fn void hash_state_pack(hash_state_t *state, void *hash_packed);
+ * \brief Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed.
+ * \param state points to the hash function
+ * \param hash_packed pointer to the contiguous memory area used to store the hash function. The size of hash_packed must be at least hash_state_packed_size()
+ *
+ * Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed.
+ * However, the hash function type must be packed outside.
+ */
+void hash_state_pack(hash_state_t *state, void *hash_packed);
+
+/** \fn cmph_uint32 hash_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen);
+ * \param hash_packed is a pointer to a contiguous memory area
+ * \param hashfunc is the type of the hash function packed in hash_packed
+ * \param key is a pointer to a key
+ * \param keylen is the key length
+ * \return an integer that represents a hash value of 32 bits.
+ */
+cmph_uint32 hash_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen);
+
+/** \fn cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc)
+ * \brief Return the amount of space needed to pack a hash function.
+ * \param hashfunc function type
+ * \return the size of the packed function or zero for failures
+ */
+cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc);
+
+
+/** \fn hash_vector_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
+ * \param hash_packed is a pointer to a contiguous memory area
+ * \param key is a pointer to a key
+ * \param keylen is the key length
+ * \param hashes is a pointer to a memory large enough to fit three 32-bit integers.
+ */
+void hash_vector_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
+
+
+/** \fn CMPH_HASH hash_get_type(hash_state_t *state);
+ * \param state is a pointer to a hash_state_t structure
+ * \return the hash function type pointed by state
+ */
+CMPH_HASH hash_get_type(hash_state_t *state);
+
+#endif
diff --git a/girepository/cmph/hash_state.h b/girepository/cmph/hash_state.h
new file mode 100644
index 00000000..1b567dca
--- /dev/null
+++ b/girepository/cmph/hash_state.h
@@ -0,0 +1,12 @@
+#ifndef __HASH_STATE_H__
+#define __HASH_STATE_H__
+
+#include "hash.h"
+#include "jenkins_hash.h"
+union __hash_state_t
+{
+ CMPH_HASH hashfunc;
+ jenkins_state_t jenkins;
+};
+
+#endif
diff --git a/girepository/cmph/hashtree.c b/girepository/cmph/hashtree.c
new file mode 100644
index 00000000..2f3567e5
--- /dev/null
+++ b/girepository/cmph/hashtree.c
@@ -0,0 +1,289 @@
+#include "graph.h"
+#include "hashtree.h"
+#include "cmph_structs.h"
+#include "hastree_structs.h"
+#include "hash.h"
+#include "bitbool.h"
+
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+
+//#define DEBUG
+#include "debug.h"
+
+hashtree_config_data_t *hashtree_config_new()
+{
+ hashtree_config_data_t *hashtree;
+ hashtree = (hashtree_config_data_t *)malloc(sizeof(hashtree_config_data_t));
+ if (!hashtree) return NULL;
+ memset(hashtree, 0, sizeof(hashtree_config_data_t));
+ hashtree->hashfuncs[0] = CMPH_HASH_JENKINS;
+ hashtree->hashfuncs[1] = CMPH_HASH_JENKINS;
+ hashtree->hashfuncs[2] = CMPH_HASH_JENKINS;
+ hashtree->memory = 32 * 1024 * 1024;
+ return hashtree;
+}
+void hashtree_config_destroy(cmph_config_t *mph)
+{
+ hashtree_config_data_t *data = (hashtree_config_data_t *)mph->data;
+ DEBUGP("Destroying algorithm dependent data\n");
+ free(data);
+}
+
+void hashtree_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
+{
+ hashtree_config_data_t *hashtree = (hashtree_config_data_t *)mph->data;
+ CMPH_HASH *hashptr = hashfuncs;
+ cmph_uint32 i = 0;
+ while(*hashptr != CMPH_HASH_COUNT)
+ {
+ if (i >= 3) break; //hashtree only uses three hash functions
+ hashtree->hashfuncs[i] = *hashptr;
+ ++i, ++hashptr;
+ }
+}
+
+cmph_t *hashtree_new(cmph_config_t *mph, double c)
+{
+ cmph_t *mphf = NULL;
+ hashtree_data_t *hashtreef = NULL;
+
+ cmph_uint32 i;
+ cmph_uint32 iterations = 20;
+ cmph_uint8 *visited = NULL;
+ hashtree_config_data_t *hashtree = (hashtree_config_data_t *)mph->data;
+ hashtree->m = mph->key_source->nkeys;
+ hashtree->n = ceil(c * mph->key_source->nkeys);
+ DEBUGP("m (edges): %u n (vertices): %u c: %f\n", hashtree->m, hashtree->n, c);
+ hashtree->graph = graph_new(hashtree->n, hashtree->m);
+ DEBUGP("Created graph\n");
+
+ hashtree->hashes = (hash_state_t **)malloc(sizeof(hash_state_t *)*3);
+ for(i = 0; i < 3; ++i) hashtree->hashes[i] = NULL;
+ //Mapping step
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Entering mapping step for mph creation of %u keys with graph sized %u\n", hashtree->m, hashtree->n);
+ }
+ while(1)
+ {
+ int ok;
+ hashtree->hashes[0] = hash_state_new(hashtree->hashfuncs[0], hashtree->n);
+ hashtree->hashes[1] = hash_state_new(hashtree->hashfuncs[1], hashtree->n);
+ ok = hashtree_gen_edges(mph);
+ if (!ok)
+ {
+ --iterations;
+ hash_state_destroy(hashtree->hashes[0]);
+ hashtree->hashes[0] = NULL;
+ hash_state_destroy(hashtree->hashes[1]);
+ hashtree->hashes[1] = NULL;
+ DEBUGP("%u iterations remaining\n", iterations);
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Acyclic graph creation failure - %u iterations remaining\n", iterations);
+ }
+ if (iterations == 0) break;
+ }
+ else break;
+ }
+ if (iterations == 0)
+ {
+ graph_destroy(hashtree->graph);
+ return NULL;
+ }
+
+ //Assignment step
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Starting assignment step\n");
+ }
+ DEBUGP("Assignment step\n");
+ visited = (char *)malloc(hashtree->n/8 + 1);
+ memset(visited, 0, hashtree->n/8 + 1);
+ free(hashtree->g);
+ hashtree->g = (cmph_uint32 *)malloc(hashtree->n * sizeof(cmph_uint32));
+ assert(hashtree->g);
+ for (i = 0; i < hashtree->n; ++i)
+ {
+ if (!GETBIT(visited,i))
+ {
+ hashtree->g[i] = 0;
+ hashtree_traverse(hashtree, visited, i);
+ }
+ }
+ graph_destroy(hashtree->graph);
+ free(visited);
+ hashtree->graph = NULL;
+
+ mphf = (cmph_t *)malloc(sizeof(cmph_t));
+ mphf->algo = mph->algo;
+ hashtreef = (hashtree_data_t *)malloc(sizeof(hashtree_data_t));
+ hashtreef->g = hashtree->g;
+ hashtree->g = NULL; //transfer memory ownership
+ hashtreef->hashes = hashtree->hashes;
+ hashtree->hashes = NULL; //transfer memory ownership
+ hashtreef->n = hashtree->n;
+ hashtreef->m = hashtree->m;
+ mphf->data = hashtreef;
+ mphf->size = hashtree->m;
+ DEBUGP("Successfully generated minimal perfect hash\n");
+ if (mph->verbosity)
+ {
+ fprintf(stderr, "Successfully generated minimal perfect hash function\n");
+ }
+ return mphf;
+}
+
+static void hashtree_traverse(hashtree_config_data_t *hashtree, cmph_uint8 *visited, cmph_uint32 v)
+{
+
+ graph_iterator_t it = graph_neighbors_it(hashtree->graph, v);
+ cmph_uint32 neighbor = 0;
+ SETBIT(visited,v);
+
+ DEBUGP("Visiting vertex %u\n", v);
+ while((neighbor = graph_next_neighbor(hashtree->graph, &it)) != GRAPH_NO_NEIGHBOR)
+ {
+ DEBUGP("Visiting neighbor %u\n", neighbor);
+ if(GETBIT(visited,neighbor)) continue;
+ DEBUGP("Visiting neighbor %u\n", neighbor);
+ DEBUGP("Visiting edge %u->%u with id %u\n", v, neighbor, graph_edge_id(hashtree->graph, v, neighbor));
+ hashtree->g[neighbor] = graph_edge_id(hashtree->graph, v, neighbor) - hashtree->g[v];
+ DEBUGP("g is %u (%u - %u mod %u)\n", hashtree->g[neighbor], graph_edge_id(hashtree->graph, v, neighbor), hashtree->g[v], hashtree->m);
+ hashtree_traverse(hashtree, visited, neighbor);
+ }
+}
+
+static int hashtree_gen_edges(cmph_config_t *mph)
+{
+ cmph_uint32 e;
+ hashtree_config_data_t *hashtree = (hashtree_config_data_t *)mph->data;
+ int cycles = 0;
+
+ DEBUGP("Generating edges for %u vertices with hash functions %s and %s\n", hashtree->n, cmph_hash_names[hashtree->hashfuncs[0]], cmph_hash_names[hashtree->hashfuncs[1]]);
+ graph_clear_edges(hashtree->graph);
+ mph->key_source->rewind(mph->key_source->data);
+ for (e = 0; e < mph->key_source->nkeys; ++e)
+ {
+ cmph_uint32 h1, h2;
+ cmph_uint32 keylen;
+ char *key;
+ mph->key_source->read(mph->key_source->data, &key, &keylen);
+ h1 = hash(hashtree->hashes[0], key, keylen) % hashtree->n;
+ h2 = hash(hashtree->hashes[1], key, keylen) % hashtree->n;
+ if (h1 == h2) if (++h2 >= hashtree->n) h2 = 0;
+ if (h1 == h2)
+ {
+ if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e);
+ mph->key_source->dispose(mph->key_source->data, key, keylen);
+ return 0;
+ }
+ DEBUGP("Adding edge: %u -> %u for key %s\n", h1, h2, key);
+ mph->key_source->dispose(mph->key_source->data, key, keylen);
+ graph_add_edge(hashtree->graph, h1, h2);
+ }
+ cycles = graph_is_cyclic(hashtree->graph);
+ if (mph->verbosity && cycles) fprintf(stderr, "Cyclic graph generated\n");
+ DEBUGP("Looking for cycles: %u\n", cycles);
+
+ return ! cycles;
+}
+
+int hashtree_dump(cmph_t *mphf, FILE *fd)
+{
+ char *buf = NULL;
+ cmph_uint32 buflen;
+ cmph_uint32 two = 2; //number of hash functions
+ hashtree_data_t *data = (hashtree_data_t *)mphf->data;
+ __cmph_dump(mphf, fd);
+
+ fwrite(&two, sizeof(cmph_uint32), 1, fd);
+ hash_state_dump(data->hashes[0], &buf, &buflen);
+ DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
+ fwrite(&buflen, sizeof(cmph_uint32), 1, fd);
+ fwrite(buf, buflen, 1, fd);
+ free(buf);
+
+ hash_state_dump(data->hashes[1], &buf, &buflen);
+ DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
+ fwrite(&buflen, sizeof(cmph_uint32), 1, fd);
+ fwrite(buf, buflen, 1, fd);
+ free(buf);
+
+ fwrite(&(data->n), sizeof(cmph_uint32), 1, fd);
+ fwrite(&(data->m), sizeof(cmph_uint32), 1, fd);
+
+ fwrite(data->g, sizeof(cmph_uint32)*data->n, 1, fd);
+ #ifdef DEBUG
+ fprintf(stderr, "G: ");
+ for (i = 0; i < data->n; ++i) fprintf(stderr, "%u ", data->g[i]);
+ fprintf(stderr, "\n");
+ #endif
+ return 1;
+}
+
+void hashtree_load(FILE *f, cmph_t *mphf)
+{
+ cmph_uint32 nhashes;
+ char *buf = NULL;
+ cmph_uint32 buflen;
+ cmph_uint32 i;
+ hashtree_data_t *hashtree = (hashtree_data_t *)malloc(sizeof(hashtree_data_t));
+
+ DEBUGP("Loading hashtree mphf\n");
+ mphf->data = hashtree;
+ fread(&nhashes, sizeof(cmph_uint32), 1, f);
+ hashtree->hashes = (hash_state_t **)malloc(sizeof(hash_state_t *)*(nhashes + 1));
+ hashtree->hashes[nhashes] = NULL;
+ DEBUGP("Reading %u hashes\n", nhashes);
+ for (i = 0; i < nhashes; ++i)
+ {
+ hash_state_t *state = NULL;
+ fread(&buflen, sizeof(cmph_uint32), 1, f);
+ DEBUGP("Hash state has %u bytes\n", buflen);
+ buf = (char *)malloc(buflen);
+ fread(buf, buflen, 1, f);
+ state = hash_state_load(buf, buflen);
+ hashtree->hashes[i] = state;
+ free(buf);
+ }
+
+ DEBUGP("Reading m and n\n");
+ fread(&(hashtree->n), sizeof(cmph_uint32), 1, f);
+ fread(&(hashtree->m), sizeof(cmph_uint32), 1, f);
+
+ hashtree->g = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*hashtree->n);
+ fread(hashtree->g, hashtree->n*sizeof(cmph_uint32), 1, f);
+ #ifdef DEBUG
+ fprintf(stderr, "G: ");
+ for (i = 0; i < hashtree->n; ++i) fprintf(stderr, "%u ", hashtree->g[i]);
+ fprintf(stderr, "\n");
+ #endif
+ return;
+}
+
+
+cmph_uint32 hashtree_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
+{
+ hashtree_data_t *hashtree = mphf->data;
+ cmph_uint32 h1 = hash(hashtree->hashes[0], key, keylen) % hashtree->n;
+ cmph_uint32 h2 = hash(hashtree->hashes[1], key, keylen) % hashtree->n;
+ DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
+ if (h1 == h2 && ++h2 >= hashtree->n) h2 = 0;
+ DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, hashtree->g[h1], hashtree->g[h2], hashtree->m);
+ return (hashtree->g[h1] + hashtree->g[h2]) % hashtree->m;
+}
+void hashtree_destroy(cmph_t *mphf)
+{
+ hashtree_data_t *data = (hashtree_data_t *)mphf->data;
+ free(data->g);
+ hash_state_destroy(data->hashes[0]);
+ hash_state_destroy(data->hashes[1]);
+ free(data->hashes);
+ free(data);
+ free(mphf);
+}
diff --git a/girepository/cmph/hashtree.h b/girepository/cmph/hashtree.h
new file mode 100644
index 00000000..8bff6746
--- /dev/null
+++ b/girepository/cmph/hashtree.h
@@ -0,0 +1,19 @@
+#ifndef __CMPH_HASHTREE_H__
+#define __CMPH_HASHTREE_H__
+
+#include "cmph.h"
+
+typedef struct __hashtree_data_t hashtree_data_t;
+typedef struct __hashtree_config_data_t hashtree_config_data_t;
+
+hashtree_config_data_t *hashtree_config_new();
+void hashtree_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
+void hashtree_config_set_leaf_algo(cmph_config_t *mph, CMPH_ALGO leaf_algo);
+void hashtree_config_destroy(cmph_config_t *mph);
+cmph_t *hashtree_new(cmph_config_t *mph, double c);
+
+void hashtree_load(FILE *f, cmph_t *mphf);
+int hashtree_dump(cmph_t *mphf, FILE *f);
+void hashtree_destroy(cmph_t *mphf);
+cmph_uint32 hashtree_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
+#endif
diff --git a/girepository/cmph/hashtree_structs.h b/girepository/cmph/hashtree_structs.h
new file mode 100644
index 00000000..7258cd39
--- /dev/null
+++ b/girepository/cmph/hashtree_structs.h
@@ -0,0 +1,32 @@
+#ifndef __CMPH_HASHTREE_STRUCTS_H__
+#define __CMPH_HASHTREE_STRUCTS_H__
+
+#include "hash_state.h"
+
+struct __hashtree_data_t
+{
+ cmph_uint32 m; //edges (words) count
+ double c; //constant c
+ cmph_uint8 *size; //size[i] stores the number of edges represented by g[i]
+ cmph_uint32 **g;
+ cmph_uint32 k; //number of components
+ hash_state_t **h1;
+ hash_state_t **h2;
+ hash_state_t *h3;
+};
+
+struct __hashtree_config_data_t
+{
+ CMPH_ALGO leaf_algo;
+ CMPH_HASH hashfuncs[3];
+ cmph_uint32 m; //edges (words) count
+ cmph_uint8 *size; //size[i] stores the number of edges represented by g[i]
+ cmph_uint32 *offset; //offset[i] stores the sum size[0] + ... size[i - 1]
+ cmph_uint32 k; //number of components
+ cmph_uint32 memory;
+ hash_state_t **h1;
+ hash_state_t **h2;
+ hash_state_t *h3;
+};
+
+#endif
diff --git a/girepository/cmph/jenkins_hash.c b/girepository/cmph/jenkins_hash.c
new file mode 100644
index 00000000..f5233a5a
--- /dev/null
+++ b/girepository/cmph/jenkins_hash.c
@@ -0,0 +1,297 @@
+#include "jenkins_hash.h"
+#include <stdlib.h>
+#ifdef WIN32
+#define _USE_MATH_DEFINES //For M_LOG2E
+#endif
+#include <math.h>
+#include <limits.h>
+#include <string.h>
+
+//#define DEBUG
+#include "debug.h"
+
+#define hashsize(n) ((cmph_uint32)1<<(n))
+#define hashmask(n) (hashsize(n)-1)
+
+
+
+//#define NM2 /* Define this if you do not want power of 2 table sizes*/
+
+
+/*
+ --------------------------------------------------------------------
+ mix -- mix 3 32-bit values reversibly.
+ For every delta with one or two bits set, and the deltas of all three
+ high bits or all three low bits, whether the original value of a,b,c
+ is almost all zero or is uniformly distributed,
+ * If mix() is run forward or backward, at least 32 bits in a,b,c
+ have at least 1/4 probability of changing.
+ * If mix() is run forward, every bit of c will change between 1/3 and
+ 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
+ mix() was built out of 36 single-cycle latency instructions in a
+ structure that could supported 2x parallelism, like so:
+ a -= b;
+ a -= c; x = (c>>13);
+ b -= c; a ^= x;
+ b -= a; x = (a<<8);
+ c -= a; b ^= x;
+ c -= b; x = (b>>13);
+ ...
+ Unfortunately, superscalar Pentiums and Sparcs can't take advantage
+ of that parallelism. They've also turned some of those single-cycle
+ latency instructions into multi-cycle latency instructions. Still,
+ this is the fastest good hash I could find. There were about 2^^68
+ to choose from. I only looked at a billion or so.
+ --------------------------------------------------------------------
+ */
+#define mix(a,b,c) \
+{ \
+ a -= b; a -= c; a ^= (c>>13); \
+ b -= c; b -= a; b ^= (a<<8); \
+ c -= a; c -= b; c ^= (b>>13); \
+ a -= b; a -= c; a ^= (c>>12); \
+ b -= c; b -= a; b ^= (a<<16); \
+ c -= a; c -= b; c ^= (b>>5); \
+ a -= b; a -= c; a ^= (c>>3); \
+ b -= c; b -= a; b ^= (a<<10); \
+ c -= a; c -= b; c ^= (b>>15); \
+}
+
+/*
+ --------------------------------------------------------------------
+ hash() -- hash a variable-length key into a 32-bit value
+k : the key (the unaligned variable-length array of bytes)
+len : the length of the key, counting by bytes
+initval : can be any 4-byte value
+Returns a 32-bit value. Every bit of the key affects every bit of
+the return value. Every 1-bit and 2-bit delta achieves avalanche.
+About 6*len+35 instructions.
+
+The best hash table sizes are powers of 2. There is no need to do
+mod a prime (mod is sooo slow!). If you need less than 32 bits,
+use a bitmask. For example, if you need only 10 bits, do
+h = (h & hashmask(10));
+In which case, the hash table should have hashsize(10) elements.
+
+If you are hashing n strings (cmph_uint8 **)k, do it like this:
+for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
+
+By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
+code any way you wish, private, educational, or commercial. It's free.
+
+See http://burtleburtle.net/bob/hash/evahash.html
+Use for hash table lookup, or anything where one collision in 2^^32 is
+acceptable. Do NOT use for cryptographic purposes.
+--------------------------------------------------------------------
+ */
+jenkins_state_t *jenkins_state_new(cmph_uint32 size) //size of hash table
+{
+ jenkins_state_t *state = (jenkins_state_t *)malloc(sizeof(jenkins_state_t));
+ DEBUGP("Initializing jenkins hash\n");
+ state->seed = ((cmph_uint32)rand() % size);
+ return state;
+}
+void jenkins_state_destroy(jenkins_state_t *state)
+{
+ free(state);
+}
+
+
+inline void __jenkins_hash_vector(cmph_uint32 seed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
+{
+ register cmph_uint32 len, length;
+
+ /* Set up the internal state */
+ length = keylen;
+ len = length;
+ hashes[0] = hashes[1] = 0x9e3779b9; /* the golden ratio; an arbitrary value */
+ hashes[2] = seed; /* the previous hash value - seed in our case */
+
+ /*---------------------------------------- handle most of the key */
+ while (len >= 12)
+ {
+ hashes[0] += ((cmph_uint32)k[0] +((cmph_uint32)k[1]<<8) +((cmph_uint32)k[2]<<16) +((cmph_uint32)k[3]<<24));
+ hashes[1] += ((cmph_uint32)k[4] +((cmph_uint32)k[5]<<8) +((cmph_uint32)k[6]<<16) +((cmph_uint32)k[7]<<24));
+ hashes[2] += ((cmph_uint32)k[8] +((cmph_uint32)k[9]<<8) +((cmph_uint32)k[10]<<16)+((cmph_uint32)k[11]<<24));
+ mix(hashes[0],hashes[1],hashes[2]);
+ k += 12; len -= 12;
+ }
+
+ /*------------------------------------- handle the last 11 bytes */
+ hashes[2] += length;
+ switch(len) /* all the case statements fall through */
+ {
+ case 11:
+ hashes[2] +=((cmph_uint32)k[10]<<24);
+ case 10:
+ hashes[2] +=((cmph_uint32)k[9]<<16);
+ case 9 :
+ hashes[2] +=((cmph_uint32)k[8]<<8);
+ /* the first byte of hashes[2] is reserved for the length */
+ case 8 :
+ hashes[1] +=((cmph_uint32)k[7]<<24);
+ case 7 :
+ hashes[1] +=((cmph_uint32)k[6]<<16);
+ case 6 :
+ hashes[1] +=((cmph_uint32)k[5]<<8);
+ case 5 :
+ hashes[1] +=(cmph_uint8) k[4];
+ case 4 :
+ hashes[0] +=((cmph_uint32)k[3]<<24);
+ case 3 :
+ hashes[0] +=((cmph_uint32)k[2]<<16);
+ case 2 :
+ hashes[0] +=((cmph_uint32)k[1]<<8);
+ case 1 :
+ hashes[0] +=(cmph_uint8)k[0];
+ /* case 0: nothing left to add */
+ }
+
+ mix(hashes[0],hashes[1],hashes[2]);
+}
+
+cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keylen)
+{
+ cmph_uint32 hashes[3];
+ __jenkins_hash_vector(state->seed, k, keylen, hashes);
+ return hashes[2];
+/* cmph_uint32 a, b, c;
+ cmph_uint32 len, length;
+
+ // Set up the internal state
+ length = keylen;
+ len = length;
+ a = b = 0x9e3779b9; // the golden ratio; an arbitrary value
+ c = state->seed; // the previous hash value - seed in our case
+
+ // handle most of the key
+ while (len >= 12)
+ {
+ a += (k[0] +((cmph_uint32)k[1]<<8) +((cmph_uint32)k[2]<<16) +((cmph_uint32)k[3]<<24));
+ b += (k[4] +((cmph_uint32)k[5]<<8) +((cmph_uint32)k[6]<<16) +((cmph_uint32)k[7]<<24));
+ c += (k[8] +((cmph_uint32)k[9]<<8) +((cmph_uint32)k[10]<<16)+((cmph_uint32)k[11]<<24));
+ mix(a,b,c);
+ k += 12; len -= 12;
+ }
+
+ // handle the last 11 bytes
+ c += length;
+ switch(len) /// all the case statements fall through
+ {
+ case 11:
+ c +=((cmph_uint32)k[10]<<24);
+ case 10:
+ c +=((cmph_uint32)k[9]<<16);
+ case 9 :
+ c +=((cmph_uint32)k[8]<<8);
+ // the first byte of c is reserved for the length
+ case 8 :
+ b +=((cmph_uint32)k[7]<<24);
+ case 7 :
+ b +=((cmph_uint32)k[6]<<16);
+ case 6 :
+ b +=((cmph_uint32)k[5]<<8);
+ case 5 :
+ b +=k[4];
+ case 4 :
+ a +=((cmph_uint32)k[3]<<24);
+ case 3 :
+ a +=((cmph_uint32)k[2]<<16);
+ case 2 :
+ a +=((cmph_uint32)k[1]<<8);
+ case 1 :
+ a +=k[0];
+ // case 0: nothing left to add
+ }
+
+ mix(a,b,c);
+
+ /// report the result
+
+ return c;
+ */
+}
+
+void jenkins_hash_vector_(jenkins_state_t *state, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
+{
+ __jenkins_hash_vector(state->seed, k, keylen, hashes);
+}
+
+void jenkins_state_dump(jenkins_state_t *state, char **buf, cmph_uint32 *buflen)
+{
+ *buflen = sizeof(cmph_uint32);
+ *buf = (char *)malloc(sizeof(cmph_uint32));
+ if (!*buf)
+ {
+ *buflen = UINT_MAX;
+ return;
+ }
+ memcpy(*buf, &(state->seed), sizeof(cmph_uint32));
+ DEBUGP("Dumped jenkins state with seed %u\n", state->seed);
+ return;
+}
+
+jenkins_state_t *jenkins_state_copy(jenkins_state_t *src_state)
+{
+ jenkins_state_t *dest_state = (jenkins_state_t *)malloc(sizeof(jenkins_state_t));
+ dest_state->hashfunc = src_state->hashfunc;
+ dest_state->seed = src_state->seed;
+ return dest_state;
+}
+
+jenkins_state_t *jenkins_state_load(const char *buf, cmph_uint32 buflen)
+{
+ jenkins_state_t *state = (jenkins_state_t *)malloc(sizeof(jenkins_state_t));
+ state->seed = *(cmph_uint32 *)buf;
+ state->hashfunc = CMPH_HASH_JENKINS;
+ DEBUGP("Loaded jenkins state with seed %u\n", state->seed);
+ return state;
+}
+
+
+/** \fn void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed);
+ * \brief Support the ability to pack a jenkins function into a preallocated contiguous memory space pointed by jenkins_packed.
+ * \param state points to the jenkins function
+ * \param jenkins_packed pointer to the contiguous memory area used to store the jenkins function. The size of jenkins_packed must be at least jenkins_state_packed_size()
+ */
+void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed)
+{
+ if (state && jenkins_packed)
+ {
+ memcpy(jenkins_packed, &(state->seed), sizeof(cmph_uint32));
+ }
+}
+
+/** \fn cmph_uint32 jenkins_state_packed_size(jenkins_state_t *state);
+ * \brief Return the amount of space needed to pack a jenkins function.
+ * \return the size of the packed function or zero for failures
+ */
+cmph_uint32 jenkins_state_packed_size()
+{
+ return sizeof(cmph_uint32);
+}
+
+
+/** \fn cmph_uint32 jenkins_hash_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen);
+ * \param jenkins_packed is a pointer to a contiguous memory area
+ * \param key is a pointer to a key
+ * \param keylen is the key length
+ * \return an integer that represents a hash value of 32 bits.
+ */
+cmph_uint32 jenkins_hash_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen)
+{
+ cmph_uint32 hashes[3];
+ __jenkins_hash_vector(*((cmph_uint32 *)jenkins_packed), k, keylen, hashes);
+ return hashes[2];
+}
+
+/** \fn jenkins_hash_vector_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
+ * \param jenkins_packed is a pointer to a contiguous memory area
+ * \param key is a pointer to a key
+ * \param keylen is the key length
+ * \param hashes is a pointer to a memory large enough to fit three 32-bit integers.
+ */
+void jenkins_hash_vector_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
+{
+ __jenkins_hash_vector(*((cmph_uint32 *)jenkins_packed), k, keylen, hashes);
+}
diff --git a/girepository/cmph/jenkins_hash.h b/girepository/cmph/jenkins_hash.h
new file mode 100644
index 00000000..8e8b9173
--- /dev/null
+++ b/girepository/cmph/jenkins_hash.h
@@ -0,0 +1,65 @@
+#ifndef __JEKINS_HASH_H__
+#define __JEKINS_HASH_H__
+
+#include "hash.h"
+
+typedef struct __jenkins_state_t
+{
+ CMPH_HASH hashfunc;
+ cmph_uint32 seed;
+} jenkins_state_t;
+
+jenkins_state_t *jenkins_state_new(cmph_uint32 size); //size of hash table
+
+/** \fn cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keylen);
+ * \param state is a pointer to a jenkins_state_t structure
+ * \param key is a pointer to a key
+ * \param keylen is the key length
+ * \return an integer that represents a hash value of 32 bits.
+ */
+cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keylen);
+
+/** \fn void jenkins_hash_vector_(jenkins_state_t *state, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
+ * \param state is a pointer to a jenkins_state_t structure
+ * \param key is a pointer to a key
+ * \param keylen is the key length
+ * \param hashes is a pointer to a memory large enough to fit three 32-bit integers.
+ */
+void jenkins_hash_vector_(jenkins_state_t *state, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
+
+void jenkins_state_dump(jenkins_state_t *state, char **buf, cmph_uint32 *buflen);
+jenkins_state_t *jenkins_state_copy(jenkins_state_t *src_state);
+jenkins_state_t *jenkins_state_load(const char *buf, cmph_uint32 buflen);
+void jenkins_state_destroy(jenkins_state_t *state);
+
+/** \fn void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed);
+ * \brief Support the ability to pack a jenkins function into a preallocated contiguous memory space pointed by jenkins_packed.
+ * \param state points to the jenkins function
+ * \param jenkins_packed pointer to the contiguous memory area used to store the jenkins function. The size of jenkins_packed must be at least jenkins_state_packed_size()
+ */
+void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed);
+
+/** \fn cmph_uint32 jenkins_state_packed_size();
+ * \brief Return the amount of space needed to pack a jenkins function.
+ * \return the size of the packed function or zero for failures
+ */
+cmph_uint32 jenkins_state_packed_size();
+
+
+/** \fn cmph_uint32 jenkins_hash_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen);
+ * \param jenkins_packed is a pointer to a contiguous memory area
+ * \param key is a pointer to a key
+ * \param keylen is the key length
+ * \return an integer that represents a hash value of 32 bits.
+ */
+cmph_uint32 jenkins_hash_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen);
+
+/** \fn jenkins_hash_vector_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
+ * \param jenkins_packed is a pointer to a contiguous memory area
+ * \param key is a pointer to a key
+ * \param keylen is the key length
+ * \param hashes is a pointer to a memory large enough to fit three 32-bit integers.
+ */
+void jenkins_hash_vector_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
+
+#endif
diff --git a/girepository/cmph/main.c b/girepository/cmph/main.c
new file mode 100644
index 00000000..f739b325
--- /dev/null
+++ b/girepository/cmph/main.c
@@ -0,0 +1,342 @@
+#ifdef WIN32
+#include "wingetopt.h"
+#else
+#include <getopt.h>
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <time.h>
+#include <limits.h>
+#include <assert.h>
+#include "cmph.h"
+#include "hash.h"
+
+#ifdef WIN32
+#define VERSION "0.8"
+#else
+#include "config.h"
+#endif
+
+
+void usage(const char *prg)
+{
+ fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c algorithm_dependent_value][-s seed] ] [-a algorithm] [-M memory_in_MB] [-b algorithm_dependent_value] [-t keys_per_bin] [-d tmp_dir] [-m file.mph] keysfile\n", prg);
+}
+void usage_long(const char *prg)
+{
+ cmph_uint32 i;
+ fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c algorithm_dependent_value][-s seed] ] [-a algorithm] [-M memory_in_MB] [-b algorithm_dependent_value] [-t keys_per_bin] [-d tmp_dir] [-m file.mph] keysfile\n", prg);
+ fprintf(stderr, "Minimum perfect hashing tool\n\n");
+ fprintf(stderr, " -h\t print this help message\n");
+ fprintf(stderr, " -c\t c value determines:\n");
+ fprintf(stderr, " \t * the number of vertices in the graph for the algorithms BMZ and CHM\n");
+ fprintf(stderr, " \t * the number of bits per key required in the FCH algorithm\n");
+ fprintf(stderr, " \t * the load factor in the CHD_PH algorithm\n");
+ fprintf(stderr, " -a\t algorithm - valid values are\n");
+ for (i = 0; i < CMPH_COUNT; ++i) fprintf(stderr, " \t * %s\n", cmph_names[i]);
+ fprintf(stderr, " -f\t hash function (may be used multiple times) - valid values are\n");
+ for (i = 0; i < CMPH_HASH_COUNT; ++i) fprintf(stderr, " \t * %s\n", cmph_hash_names[i]);
+ fprintf(stderr, " -V\t print version number and exit\n");
+ fprintf(stderr, " -v\t increase verbosity (may be used multiple times)\n");
+ fprintf(stderr, " -k\t number of keys\n");
+ fprintf(stderr, " -g\t generation mode\n");
+ fprintf(stderr, " -s\t random seed\n");
+ fprintf(stderr, " -m\t minimum perfect hash function file \n");
+ fprintf(stderr, " -M\t main memory availability (in MB) used in BRZ algorithm \n");
+ fprintf(stderr, " -d\t temporary directory used in BRZ algorithm \n");
+ fprintf(stderr, " -b\t the meaning of this parameter depends on the algorithm selected in the -a option:\n");
+ fprintf(stderr, " \t * For BRZ it is used to make the maximal number of keys in a bucket lower than 256.\n");
+ fprintf(stderr, " \t In this case its value should be an integer in the range [64,175]. Default is 128.\n\n");
+ fprintf(stderr, " \t * For BDZ it is used to determine the size of some precomputed rank\n");
+ fprintf(stderr, " \t information and its value should be an integer in the range [3,10]. Default\n");
+ fprintf(stderr, " \t is 7. The larger is this value, the more compact are the resulting functions\n");
+ fprintf(stderr, " \t and the slower are them at evaluation time.\n\n");
+ fprintf(stderr, " \t * For CHD and CHD_PH it is used to set the average number of keys per bucket\n");
+ fprintf(stderr, " \t and its value should be an integer in the range [1,32]. Default is 4. The\n");
+ fprintf(stderr, " \t larger is this value, the slower is the construction of the functions.\n");
+ fprintf(stderr, " \t This parameter has no effect for other algorithms.\n\n");
+ fprintf(stderr, " -t\t set the number of keys per bin for a t-perfect hashing function. A t-perfect\n");
+ fprintf(stderr, " \t hash function allows at most t collisions in a given bin. This parameter applies\n");
+ fprintf(stderr, " \t only to the CHD and CHD_PH algorithms. Its value should be an integer in the\n");
+ fprintf(stderr, " \t range [1,128]. Defaul is 1\n");
+ fprintf(stderr, " keysfile\t line separated file with keys\n");
+}
+
+int main(int argc, char **argv)
+{
+ cmph_uint32 verbosity = 0;
+ char generate = 0;
+ char *mphf_file = NULL;
+ FILE *mphf_fd = stdout;
+ const char *keys_file = NULL;
+ FILE *keys_fd;
+ cmph_uint32 nkeys = UINT_MAX;
+ cmph_uint32 seed = UINT_MAX;
+ CMPH_HASH *hashes = NULL;
+ cmph_uint32 nhashes = 0;
+ cmph_uint32 i;
+ CMPH_ALGO mph_algo = CMPH_CHM;
+ double c = 0;
+ cmph_config_t *config = NULL;
+ cmph_t *mphf = NULL;
+ char * tmp_dir = NULL;
+ cmph_io_adapter_t *source;
+ cmph_uint32 memory_availability = 0;
+ cmph_uint32 b = 0;
+ cmph_uint32 keys_per_bin = 1;
+ while (1)
+ {
+ char ch = (char)getopt(argc, argv, "hVvgc:k:a:M:b:t:f:m:d:s:");
+ if (ch == -1) break;
+ switch (ch)
+ {
+ case 's':
+ {
+ char *cptr;
+ seed = (cmph_uint32)strtoul(optarg, &cptr, 10);
+ if(*cptr != 0) {
+ fprintf(stderr, "Invalid seed %s\n", optarg);
+ exit(1);
+ }
+ }
+ break;
+ case 'c':
+ {
+ char *endptr;
+ c = strtod(optarg, &endptr);
+ if(*endptr != 0) {
+ fprintf(stderr, "Invalid c value %s\n", optarg);
+ exit(1);
+ }
+ }
+ break;
+ case 'g':
+ generate = 1;
+ break;
+ case 'k':
+ {
+ char *endptr;
+ nkeys = (cmph_uint32)strtoul(optarg, &endptr, 10);
+ if(*endptr != 0) {
+ fprintf(stderr, "Invalid number of keys %s\n", optarg);
+ exit(1);
+ }
+ }
+ break;
+ case 'm':
+ mphf_file = strdup(optarg);
+ break;
+ case 'd':
+ tmp_dir = strdup(optarg);
+ break;
+ case 'M':
+ {
+ char *cptr;
+ memory_availability = (cmph_uint32)strtoul(optarg, &cptr, 10);
+ if(*cptr != 0) {
+ fprintf(stderr, "Invalid memory availability %s\n", optarg);
+ exit(1);
+ }
+ }
+ break;
+ case 'b':
+ {
+ char *cptr;
+ b = (cmph_uint32)strtoul(optarg, &cptr, 10);
+ if(*cptr != 0) {
+ fprintf(stderr, "Parameter b was not found: %s\n", optarg);
+ exit(1);
+ }
+ }
+ break;
+ case 't':
+ {
+ char *cptr;
+ keys_per_bin = (cmph_uint32)strtoul(optarg, &cptr, 10);
+ if(*cptr != 0) {
+ fprintf(stderr, "Parameter t was not found: %s\n", optarg);
+ exit(1);
+ }
+ }
+ break;
+ case 'v':
+ ++verbosity;
+ break;
+ case 'V':
+ printf("%s\n", VERSION);
+ return 0;
+ case 'h':
+ usage_long(argv[0]);
+ return 0;
+ case 'a':
+ {
+ char valid = 0;
+ for (i = 0; i < CMPH_COUNT; ++i)
+ {
+ if (strcmp(cmph_names[i], optarg) == 0)
+ {
+ mph_algo = i;
+ valid = 1;
+ break;
+ }
+ }
+ if (!valid)
+ {
+ fprintf(stderr, "Invalid mph algorithm: %s. It is not available in version %s\n", optarg, VERSION);
+ return -1;
+ }
+ }
+ break;
+ case 'f':
+ {
+ char valid = 0;
+ for (i = 0; i < CMPH_HASH_COUNT; ++i)
+ {
+ if (strcmp(cmph_hash_names[i], optarg) == 0)
+ {
+ hashes = (CMPH_HASH *)realloc(hashes, sizeof(CMPH_HASH) * ( nhashes + 2 ));
+ hashes[nhashes] = i;
+ hashes[nhashes + 1] = CMPH_HASH_COUNT;
+ ++nhashes;
+ valid = 1;
+ break;
+ }
+ }
+ if (!valid)
+ {
+ fprintf(stderr, "Invalid hash function: %s\n", optarg);
+ return -1;
+ }
+ }
+ break;
+ default:
+ usage(argv[0]);
+ return 1;
+ }
+ }
+
+ if (optind != argc - 1)
+ {
+ usage(argv[0]);
+ return 1;
+ }
+ keys_file = argv[optind];
+
+ if (seed == UINT_MAX) seed = (cmph_uint32)time(NULL);
+ srand(seed);
+ int ret = 0;
+ if (mphf_file == NULL)
+ {
+ mphf_file = (char *)malloc(strlen(keys_file) + 5);
+ memcpy(mphf_file, keys_file, strlen(keys_file));
+ memcpy(mphf_file + strlen(keys_file), ".mph\0", (size_t)5);
+ }
+
+ keys_fd = fopen(keys_file, "r");
+
+ if (keys_fd == NULL)
+ {
+ fprintf(stderr, "Unable to open file %s: %s\n", keys_file, strerror(errno));
+ return -1;
+ }
+
+ if (seed == UINT_MAX) seed = (cmph_uint32)time(NULL);
+ if(nkeys == UINT_MAX) source = cmph_io_nlfile_adapter(keys_fd);
+ else source = cmph_io_nlnkfile_adapter(keys_fd, nkeys);
+ if (generate)
+ {
+ //Create mphf
+ mphf_fd = fopen(mphf_file, "w");
+ config = cmph_config_new(source);
+ cmph_config_set_algo(config, mph_algo);
+ if (nhashes) cmph_config_set_hashfuncs(config, hashes);
+ cmph_config_set_verbosity(config, verbosity);
+ cmph_config_set_tmp_dir(config, (cmph_uint8 *) tmp_dir);
+ cmph_config_set_mphf_fd(config, mphf_fd);
+ cmph_config_set_memory_availability(config, memory_availability);
+ cmph_config_set_b(config, b);
+ cmph_config_set_keys_per_bin(config, keys_per_bin);
+
+ //if((mph_algo == CMPH_BMZ || mph_algo == CMPH_BRZ) && c >= 2.0) c=1.15;
+ if(mph_algo == CMPH_BMZ && c >= 2.0) c=1.15;
+ if (c != 0) cmph_config_set_graphsize(config, c);
+ mphf = cmph_new(config);
+
+ cmph_config_destroy(config);
+ if (mphf == NULL)
+ {
+ fprintf(stderr, "Unable to create minimum perfect hashing function\n");
+ //cmph_config_destroy(config);
+ free(mphf_file);
+ return -1;
+ }
+
+ if (mphf_fd == NULL)
+ {
+ fprintf(stderr, "Unable to open output file %s: %s\n", mphf_file, strerror(errno));
+ free(mphf_file);
+ return -1;
+ }
+ cmph_dump(mphf, mphf_fd);
+ cmph_destroy(mphf);
+ fclose(mphf_fd);
+ }
+ else
+ {
+ cmph_uint8 * hashtable = NULL;
+ mphf_fd = fopen(mphf_file, "r");
+ if (mphf_fd == NULL)
+ {
+ fprintf(stderr, "Unable to open input file %s: %s\n", mphf_file, strerror(errno));
+ free(mphf_file);
+ return -1;
+ }
+ mphf = cmph_load(mphf_fd);
+ fclose(mphf_fd);
+ if (!mphf)
+ {
+ fprintf(stderr, "Unable to parser input file %s\n", mphf_file);
+ free(mphf_file);
+ return -1;
+ }
+ cmph_uint32 siz = cmph_size(mphf);
+ hashtable = (cmph_uint8*)calloc(siz, sizeof(cmph_uint8));
+ memset(hashtable, 0,(size_t) siz);
+ //check all keys
+ for (i = 0; i < source->nkeys; ++i)
+ {
+ cmph_uint32 h;
+ char *buf;
+ cmph_uint32 buflen = 0;
+ source->read(source->data, &buf, &buflen);
+ h = cmph_search(mphf, buf, buflen);
+ if (!(h < siz))
+ {
+ fprintf(stderr, "Unknown key %*s in the input.\n", buflen, buf);
+ ret = 1;
+ } else if(hashtable[h] >= keys_per_bin)
+ {
+ fprintf(stderr, "More than %u keys were mapped to bin %u\n", keys_per_bin, h);
+ fprintf(stderr, "Duplicated or unknown key %*s in the input\n", buflen, buf);
+ ret = 1;
+ } else hashtable[h]++;
+
+ if (verbosity)
+ {
+ printf("%s -> %u\n", buf, h);
+ }
+ source->dispose(source->data, buf, buflen);
+ }
+
+ cmph_destroy(mphf);
+ free(hashtable);
+ }
+ fclose(keys_fd);
+ free(mphf_file);
+ free(tmp_dir);
+ cmph_io_nlfile_adapter_destroy(source);
+ return ret;
+
+}
diff --git a/girepository/cmph/miller_rabin.c b/girepository/cmph/miller_rabin.c
new file mode 100644
index 00000000..17d0ed34
--- /dev/null
+++ b/girepository/cmph/miller_rabin.c
@@ -0,0 +1,67 @@
+#include "miller_rabin.h"
+
+static inline cmph_uint64 int_pow(cmph_uint64 a, cmph_uint64 d, cmph_uint64 n)
+{
+ cmph_uint64 a_pow = a;
+ cmph_uint64 res = 1;
+ while(d > 0)
+ {
+ if((d & 1) == 1)
+ res =(((cmph_uint64)res) * a_pow) % n;
+ a_pow = (((cmph_uint64)a_pow) * a_pow) % n;
+ d /= 2;
+ };
+ return res;
+};
+
+static inline cmph_uint8 check_witness(cmph_uint64 a_exp_d, cmph_uint64 n, cmph_uint64 s)
+{
+ cmph_uint64 i;
+ cmph_uint64 a_exp = a_exp_d;
+ if(a_exp == 1 || a_exp == (n - 1))
+ return 1;
+ for(i = 1; i < s; i++)
+ {
+ a_exp = (((cmph_uint64)a_exp) * a_exp) % n;
+ if(a_exp == (n - 1))
+ return 1;
+ };
+ return 0;
+};
+
+cmph_uint8 check_primality(cmph_uint64 n)
+{
+ cmph_uint64 a, d, s, a_exp_d;
+ if((n % 2) == 0)
+ return 0;
+ if((n % 3) == 0)
+ return 0;
+ if((n % 5) == 0)
+ return 0;
+ if((n % 7 ) == 0)
+ return 0;
+ //we decompoe the number n - 1 into 2^s*d
+ s = 0;
+ d = n - 1;
+ do
+ {
+ s++;
+ d /= 2;
+ }while((d % 2) == 0);
+
+ a = 2;
+ a_exp_d = int_pow(a, d, n);
+ if(check_witness(a_exp_d, n, s) == 0)
+ return 0;
+ a = 7;
+ a_exp_d = int_pow(a, d, n);
+ if(check_witness(a_exp_d, n, s) == 0)
+ return 0;
+ a = 61;
+ a_exp_d = int_pow(a, d, n);
+ if(check_witness(a_exp_d, n, s) == 0)
+ return 0;
+ return 1;
+};
+
+
diff --git a/girepository/cmph/miller_rabin.h b/girepository/cmph/miller_rabin.h
new file mode 100644
index 00000000..42dc6ce5
--- /dev/null
+++ b/girepository/cmph/miller_rabin.h
@@ -0,0 +1,5 @@
+#ifndef _CMPH_MILLER_RABIN_H__
+#define _CMPH_MILLER_RABIN_H__
+#include "cmph_types.h"
+cmph_uint8 check_primality(cmph_uint64 n);
+#endif
diff --git a/girepository/cmph/sdbm_hash.c b/girepository/cmph/sdbm_hash.c
new file mode 100644
index 00000000..2f706c9f
--- /dev/null
+++ b/girepository/cmph/sdbm_hash.c
@@ -0,0 +1,49 @@
+#include "sdbm_hash.h"
+#include <stdlib.h>
+
+sdbm_state_t *sdbm_state_new()
+{
+ sdbm_state_t *state = (sdbm_state_t *)malloc(sizeof(sdbm_state_t));
+ state->hashfunc = CMPH_HASH_SDBM;
+ return state;
+}
+
+void sdbm_state_destroy(sdbm_state_t *state)
+{
+ free(state);
+}
+
+cmph_uint32 sdbm_hash(sdbm_state_t *state, const char *k, cmph_uint32 keylen)
+{
+ register cmph_uint32 hash = 0;
+ const unsigned char *ptr = (unsigned char *)k;
+ cmph_uint32 i = 0;
+
+ while(i < keylen) {
+ hash = *ptr + (hash << 6) + (hash << 16) - hash;
+ ++ptr, ++i;
+ }
+ return hash;
+}
+
+
+void sdbm_state_dump(sdbm_state_t *state, char **buf, cmph_uint32 *buflen)
+{
+ *buf = NULL;
+ *buflen = 0;
+ return;
+}
+
+sdbm_state_t *sdbm_state_copy(sdbm_state_t *src_state)
+{
+ sdbm_state_t *dest_state = (sdbm_state_t *)malloc(sizeof(sdbm_state_t));
+ dest_state->hashfunc = src_state->hashfunc;
+ return dest_state;
+}
+
+sdbm_state_t *sdbm_state_load(const char *buf, cmph_uint32 buflen)
+{
+ sdbm_state_t *state = (sdbm_state_t *)malloc(sizeof(sdbm_state_t));
+ state->hashfunc = CMPH_HASH_SDBM;
+ return state;
+}
diff --git a/girepository/cmph/sdbm_hash.h b/girepository/cmph/sdbm_hash.h
new file mode 100644
index 00000000..f44b2f15
--- /dev/null
+++ b/girepository/cmph/sdbm_hash.h
@@ -0,0 +1,18 @@
+#ifndef __SDBM_HASH_H__
+#define __SDBM_HASH_H__
+
+#include "hash.h"
+
+typedef struct __sdbm_state_t
+{
+ CMPH_HASH hashfunc;
+} sdbm_state_t;
+
+sdbm_state_t *sdbm_state_new();
+cmph_uint32 sdbm_hash(sdbm_state_t *state, const char *k, cmph_uint32 keylen);
+void sdbm_state_dump(sdbm_state_t *state, char **buf, cmph_uint32 *buflen);
+sdbm_state_t *sdbm_state_copy(sdbm_state_t *src_state);
+sdbm_state_t *sdbm_state_load(const char *buf, cmph_uint32 buflen);
+void sdbm_state_destroy(sdbm_state_t *state);
+
+#endif
diff --git a/girepository/cmph/select.c b/girepository/cmph/select.c
new file mode 100644
index 00000000..fec4b7ad
--- /dev/null
+++ b/girepository/cmph/select.c
@@ -0,0 +1,337 @@
+#include<stdlib.h>
+#include<stdio.h>
+#include <assert.h>
+#include <string.h>
+#include <limits.h>
+#include "select_lookup_tables.h"
+#include "select.h"
+
+//#define DEBUG
+#include "debug.h"
+
+#ifndef STEP_SELECT_TABLE
+#define STEP_SELECT_TABLE 128
+#endif
+
+#ifndef NBITS_STEP_SELECT_TABLE
+#define NBITS_STEP_SELECT_TABLE 7
+#endif
+
+#ifndef MASK_STEP_SELECT_TABLE
+#define MASK_STEP_SELECT_TABLE 0x7f // 0x7f = 127
+#endif
+
+static inline void select_insert_0(cmph_uint32 * buffer)
+{
+ (*buffer) >>= 1;
+};
+
+static inline void select_insert_1(cmph_uint32 * buffer)
+{
+ (*buffer) >>= 1;
+ (*buffer) |= 0x80000000;
+};
+
+void select_init(select_t * sel)
+{
+ sel->n = 0;
+ sel->m = 0;
+ sel->bits_vec = 0;
+ sel->select_table = 0;
+};
+
+cmph_uint32 select_get_space_usage(select_t * sel)
+{
+ register cmph_uint32 nbits;
+ register cmph_uint32 vec_size;
+ register cmph_uint32 sel_table_size;
+ register cmph_uint32 space_usage;
+
+ nbits = sel->n + sel->m;
+ vec_size = (nbits + 31) >> 5;
+ sel_table_size = (sel->n >> NBITS_STEP_SELECT_TABLE) + 1; // (sel->n >> NBITS_STEP_SELECT_TABLE) = (sel->n/STEP_SELECT_TABLE)
+
+ space_usage = 2 * sizeof(cmph_uint32) * 8; // n and m
+ space_usage += vec_size * (cmph_uint32) sizeof(cmph_uint32) * 8;
+ space_usage += sel_table_size * (cmph_uint32)sizeof(cmph_uint32) * 8;
+ return space_usage;
+}
+
+void select_destroy(select_t * sel)
+{
+ free(sel->bits_vec);
+ free(sel->select_table);
+ sel->bits_vec = 0;
+ sel->select_table = 0;
+};
+
+static inline void select_generate_sel_table(select_t * sel)
+{
+ register cmph_uint8 * bits_table = (cmph_uint8 *)sel->bits_vec;
+ register cmph_uint32 part_sum, old_part_sum;
+ register cmph_uint32 vec_idx, one_idx, sel_table_idx;
+
+ part_sum = vec_idx = one_idx = sel_table_idx = 0;
+
+ for(;;)
+ {
+ // FABIANO: Should'n it be one_idx >= sel->n
+ if(one_idx >= sel->n)
+ break;
+ do
+ {
+ old_part_sum = part_sum;
+ part_sum += rank_lookup_table[bits_table[vec_idx]];
+ vec_idx++;
+ } while (part_sum <= one_idx);
+
+ sel->select_table[sel_table_idx] = select_lookup_table[bits_table[vec_idx - 1]][one_idx - old_part_sum] + ((vec_idx - 1) << 3); // ((vec_idx - 1) << 3) = ((vec_idx - 1) * 8)
+ one_idx += STEP_SELECT_TABLE ;
+ sel_table_idx++;
+ };
+};
+
+void select_generate(select_t * sel, cmph_uint32 * keys_vec, cmph_uint32 n, cmph_uint32 m)
+{
+ register cmph_uint32 i, j, idx;
+ cmph_uint32 buffer = 0;
+
+ register cmph_uint32 nbits;
+ register cmph_uint32 vec_size;
+ register cmph_uint32 sel_table_size;
+ sel->n = n;
+ sel->m = m; // n values in the range [0,m-1]
+
+ nbits = sel->n + sel->m;
+ vec_size = (nbits + 31) >> 5; // (nbits + 31) >> 5 = (nbits + 31)/32
+
+ sel_table_size = (sel->n >> NBITS_STEP_SELECT_TABLE) + 1; // (sel->n >> NBITS_STEP_SELECT_TABLE) = (sel->n/STEP_SELECT_TABLE)
+
+ if(sel->bits_vec)
+ {
+ free(sel->bits_vec);
+ }
+ sel->bits_vec = (cmph_uint32 *)calloc(vec_size, sizeof(cmph_uint32));
+
+ if(sel->select_table)
+ {
+ free(sel->select_table);
+ }
+ sel->select_table = (cmph_uint32 *)calloc(sel_table_size, sizeof(cmph_uint32));
+
+
+
+ idx = i = j = 0;
+
+ for(;;)
+ {
+ while(keys_vec[j]==i)
+ {
+ select_insert_1(&buffer);
+ idx++;
+
+ if((idx & 0x1f) == 0 ) // (idx & 0x1f) = idx % 32
+ sel->bits_vec[(idx >> 5) - 1] = buffer; // (idx >> 5) = idx/32
+ j++;
+
+ if(j == sel->n)
+ goto loop_end;
+
+ //assert(keys_vec[j] < keys_vec[j-1]);
+ }
+
+ if(i == sel->m)
+ break;
+
+ while(keys_vec[j] > i)
+ {
+ select_insert_0(&buffer);
+ idx++;
+
+ if((idx & 0x1f) == 0 ) // (idx & 0x1f) = idx % 32
+ sel->bits_vec[(idx >> 5) - 1] = buffer; // (idx >> 5) = idx/32
+ i++;
+ };
+
+ };
+ loop_end:
+ if((idx & 0x1f) != 0 ) // (idx & 0x1f) = idx % 32
+ {
+ buffer >>= 32 - (idx & 0x1f);
+ sel->bits_vec[ (idx - 1) >> 5 ] = buffer;
+ };
+
+ select_generate_sel_table(sel);
+};
+
+static inline cmph_uint32 _select_query(cmph_uint8 * bits_table, cmph_uint32 * select_table, cmph_uint32 one_idx)
+{
+ register cmph_uint32 vec_bit_idx ,vec_byte_idx;
+ register cmph_uint32 part_sum, old_part_sum;
+
+ vec_bit_idx = select_table[one_idx >> NBITS_STEP_SELECT_TABLE]; // one_idx >> NBITS_STEP_SELECT_TABLE = one_idx/STEP_SELECT_TABLE
+ vec_byte_idx = vec_bit_idx >> 3; // vec_bit_idx / 8
+
+ one_idx &= MASK_STEP_SELECT_TABLE; // one_idx %= STEP_SELECT_TABLE == one_idx &= MASK_STEP_SELECT_TABLE
+ one_idx += rank_lookup_table[bits_table[vec_byte_idx] & ((1 << (vec_bit_idx & 0x7)) - 1)];
+ part_sum = 0;
+
+ do
+ {
+ old_part_sum = part_sum;
+ part_sum += rank_lookup_table[bits_table[vec_byte_idx]];
+ vec_byte_idx++;
+
+ }while (part_sum <= one_idx);
+
+ return select_lookup_table[bits_table[vec_byte_idx - 1]][one_idx - old_part_sum] + ((vec_byte_idx-1) << 3);
+}
+
+cmph_uint32 select_query(select_t * sel, cmph_uint32 one_idx)
+{
+ return _select_query((cmph_uint8 *)sel->bits_vec, sel->select_table, one_idx);
+};
+
+
+static inline cmph_uint32 _select_next_query(cmph_uint8 * bits_table, cmph_uint32 vec_bit_idx)
+{
+ register cmph_uint32 vec_byte_idx, one_idx;
+ register cmph_uint32 part_sum, old_part_sum;
+
+ vec_byte_idx = vec_bit_idx >> 3;
+
+ one_idx = rank_lookup_table[bits_table[vec_byte_idx] & ((1U << (vec_bit_idx & 0x7)) - 1U)] + 1U;
+ part_sum = 0;
+
+ do
+ {
+ old_part_sum = part_sum;
+ part_sum += rank_lookup_table[bits_table[vec_byte_idx]];
+ vec_byte_idx++;
+
+ }while (part_sum <= one_idx);
+
+ return select_lookup_table[bits_table[(vec_byte_idx - 1)]][(one_idx - old_part_sum)] + ((vec_byte_idx - 1) << 3);
+}
+
+cmph_uint32 select_next_query(select_t * sel, cmph_uint32 vec_bit_idx)
+{
+ return _select_next_query((cmph_uint8 *)sel->bits_vec, vec_bit_idx);
+};
+
+void select_dump(select_t *sel, char **buf, cmph_uint32 *buflen)
+{
+ register cmph_uint32 nbits = sel->n + sel->m;
+ register cmph_uint32 vec_size = ((nbits + 31) >> 5) * (cmph_uint32)sizeof(cmph_uint32); // (nbits + 31) >> 5 = (nbits + 31)/32
+ register cmph_uint32 sel_table_size = ((sel->n >> NBITS_STEP_SELECT_TABLE) + 1) * (cmph_uint32)sizeof(cmph_uint32); // (sel->n >> NBITS_STEP_SELECT_TABLE) = (sel->n/STEP_SELECT_TABLE)
+ register cmph_uint32 pos = 0;
+
+ *buflen = 2*(cmph_uint32)sizeof(cmph_uint32) + vec_size + sel_table_size;
+
+ *buf = (char *)calloc(*buflen, sizeof(char));
+
+ if (!*buf)
+ {
+ *buflen = UINT_MAX;
+ return;
+ }
+
+ memcpy(*buf, &(sel->n), sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ memcpy(*buf + pos, &(sel->m), sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ memcpy(*buf + pos, sel->bits_vec, vec_size);
+ pos += vec_size;
+ memcpy(*buf + pos, sel->select_table, sel_table_size);
+
+ DEBUGP("Dumped select structure with size %u bytes\n", *buflen);
+}
+
+void select_load(select_t * sel, const char *buf, cmph_uint32 buflen)
+{
+ register cmph_uint32 pos = 0;
+ register cmph_uint32 nbits = 0;
+ register cmph_uint32 vec_size = 0;
+ register cmph_uint32 sel_table_size = 0;
+
+ memcpy(&(sel->n), buf, sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ memcpy(&(sel->m), buf + pos, sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+
+ nbits = sel->n + sel->m;
+ vec_size = ((nbits + 31) >> 5) * (cmph_uint32)sizeof(cmph_uint32); // (nbits + 31) >> 5 = (nbits + 31)/32
+ sel_table_size = ((sel->n >> NBITS_STEP_SELECT_TABLE) + 1) * (cmph_uint32)sizeof(cmph_uint32); // (sel->n >> NBITS_STEP_SELECT_TABLE) = (sel->n/STEP_SELECT_TABLE)
+
+ if(sel->bits_vec)
+ {
+ free(sel->bits_vec);
+ }
+ sel->bits_vec = (cmph_uint32 *)calloc(vec_size/sizeof(cmph_uint32), sizeof(cmph_uint32));
+
+ if(sel->select_table)
+ {
+ free(sel->select_table);
+ }
+ sel->select_table = (cmph_uint32 *)calloc(sel_table_size/sizeof(cmph_uint32), sizeof(cmph_uint32));
+
+ memcpy(sel->bits_vec, buf + pos, vec_size);
+ pos += vec_size;
+ memcpy(sel->select_table, buf + pos, sel_table_size);
+
+ DEBUGP("Loaded select structure with size %u bytes\n", buflen);
+}
+
+
+/** \fn void select_pack(select_t *sel, void *sel_packed);
+ * \brief Support the ability to pack a select structure function into a preallocated contiguous memory space pointed by sel_packed.
+ * \param sel points to the select structure
+ * \param sel_packed pointer to the contiguous memory area used to store the select structure. The size of sel_packed must be at least @see select_packed_size
+ */
+void select_pack(select_t *sel, void *sel_packed)
+{
+ if (sel && sel_packed)
+ {
+ char *buf = NULL;
+ cmph_uint32 buflen = 0;
+ select_dump(sel, &buf, &buflen);
+ memcpy(sel_packed, buf, buflen);
+ free(buf);
+ }
+}
+
+
+/** \fn cmph_uint32 select_packed_size(select_t *sel);
+ * \brief Return the amount of space needed to pack a select structure.
+ * \return the size of the packed select structure or zero for failures
+ */
+cmph_uint32 select_packed_size(select_t *sel)
+{
+ register cmph_uint32 nbits = sel->n + sel->m;
+ register cmph_uint32 vec_size = ((nbits + 31) >> 5) * (cmph_uint32)sizeof(cmph_uint32); // (nbits + 31) >> 5 = (nbits + 31)/32
+ register cmph_uint32 sel_table_size = ((sel->n >> NBITS_STEP_SELECT_TABLE) + 1) * (cmph_uint32)sizeof(cmph_uint32); // (sel->n >> NBITS_STEP_SELECT_TABLE) = (sel->n/STEP_SELECT_TABLE)
+ return 2*(cmph_uint32)sizeof(cmph_uint32) + vec_size + sel_table_size;
+}
+
+
+
+cmph_uint32 select_query_packed(void * sel_packed, cmph_uint32 one_idx)
+{
+ register cmph_uint32 *ptr = (cmph_uint32 *)sel_packed;
+ register cmph_uint32 n = *ptr++;
+ register cmph_uint32 m = *ptr++;
+ register cmph_uint32 nbits = n + m;
+ register cmph_uint32 vec_size = (nbits + 31) >> 5; // (nbits + 31) >> 5 = (nbits + 31)/32
+ register cmph_uint8 * bits_vec = (cmph_uint8 *)ptr;
+ register cmph_uint32 * select_table = ptr + vec_size;
+
+ return _select_query(bits_vec, select_table, one_idx);
+}
+
+
+cmph_uint32 select_next_query_packed(void * sel_packed, cmph_uint32 vec_bit_idx)
+{
+ register cmph_uint8 * bits_vec = (cmph_uint8 *)sel_packed;
+ bits_vec += 8; // skipping n and m
+ return _select_next_query(bits_vec, vec_bit_idx);
+}
diff --git a/girepository/cmph/select.h b/girepository/cmph/select.h
new file mode 100644
index 00000000..a31eb0f2
--- /dev/null
+++ b/girepository/cmph/select.h
@@ -0,0 +1,61 @@
+#ifndef __CMPH_SELECT_H__
+#define __CMPH_SELECT_H__
+
+#include "cmph_types.h"
+
+struct _select_t
+{
+ cmph_uint32 n,m;
+ cmph_uint32 * bits_vec;
+ cmph_uint32 * select_table;
+};
+
+typedef struct _select_t select_t;
+
+void select_init(select_t * sel);
+
+void select_destroy(select_t * sel);
+
+void select_generate(select_t * sel, cmph_uint32 * keys_vec, cmph_uint32 n, cmph_uint32 m);
+
+cmph_uint32 select_query(select_t * sel, cmph_uint32 one_idx);
+
+cmph_uint32 select_next_query(select_t * sel, cmph_uint32 vec_bit_idx);
+
+cmph_uint32 select_get_space_usage(select_t * sel);
+
+void select_dump(select_t *sel, char **buf, cmph_uint32 *buflen);
+
+void select_load(select_t * sel, const char *buf, cmph_uint32 buflen);
+
+
+/** \fn void select_pack(select_t *sel, void *sel_packed);
+ * \brief Support the ability to pack a select structure into a preallocated contiguous memory space pointed by sel_packed.
+ * \param sel points to the select structure
+ * \param sel_packed pointer to the contiguous memory area used to store the select structure. The size of sel_packed must be at least @see select_packed_size
+ */
+void select_pack(select_t *sel, void *sel_packed);
+
+/** \fn cmph_uint32 select_packed_size(select_t *sel);
+ * \brief Return the amount of space needed to pack a select structure.
+ * \return the size of the packed select structure or zero for failures
+ */
+cmph_uint32 select_packed_size(select_t *sel);
+
+
+/** \fn cmph_uint32 select_query_packed(void * sel_packed, cmph_uint32 one_idx);
+ * \param sel_packed is a pointer to a contiguous memory area
+ * \param one_idx is the rank for which we want to calculate the inverse function select
+ * \return an integer that represents the select value of rank idx.
+ */
+cmph_uint32 select_query_packed(void * sel_packed, cmph_uint32 one_idx);
+
+
+/** \fn cmph_uint32 select_next_query_packed(void * sel_packed, cmph_uint32 vec_bit_idx);
+ * \param sel_packed is a pointer to a contiguous memory area
+ * \param vec_bit_idx is a value prior computed by @see select_query_packed
+ * \return an integer that represents the next select value greater than @see vec_bit_idx.
+ */
+cmph_uint32 select_next_query_packed(void * sel_packed, cmph_uint32 vec_bit_idx);
+
+#endif
diff --git a/girepository/cmph/select_lookup_tables.h b/girepository/cmph/select_lookup_tables.h
new file mode 100644
index 00000000..efd595ed
--- /dev/null
+++ b/girepository/cmph/select_lookup_tables.h
@@ -0,0 +1,170 @@
+#ifndef SELECT_LOOKUP_TABLES
+#define SELECT_LOOKUP_TABLES
+
+#include "cmph_types.h"
+
+/*
+rank_lookup_table[i] simply gives the number of bits set to one in the byte of value i.
+For example if i = 01010101 in binary then we have :
+rank_lookup_table[i] = 4
+*/
+
+static cmph_uint8 rank_lookup_table[256] ={
+ 0 , 1 , 1 , 2 , 1 , 2 , 2 , 3 , 1 , 2 , 2 , 3 , 2 , 3 , 3 , 4
+, 1 , 2 , 2 , 3 , 2 , 3 , 3 , 4 , 2 , 3 , 3 , 4 , 3 , 4 , 4 , 5
+, 1 , 2 , 2 , 3 , 2 , 3 , 3 , 4 , 2 , 3 , 3 , 4 , 3 , 4 , 4 , 5
+, 2 , 3 , 3 , 4 , 3 , 4 , 4 , 5 , 3 , 4 , 4 , 5 , 4 , 5 , 5 , 6
+, 1 , 2 , 2 , 3 , 2 , 3 , 3 , 4 , 2 , 3 , 3 , 4 , 3 , 4 , 4 , 5
+, 2 , 3 , 3 , 4 , 3 , 4 , 4 , 5 , 3 , 4 , 4 , 5 , 4 , 5 , 5 , 6
+, 2 , 3 , 3 , 4 , 3 , 4 , 4 , 5 , 3 , 4 , 4 , 5 , 4 , 5 , 5 , 6
+, 3 , 4 , 4 , 5 , 4 , 5 , 5 , 6 , 4 , 5 , 5 , 6 , 5 , 6 , 6 , 7
+, 1 , 2 , 2 , 3 , 2 , 3 , 3 , 4 , 2 , 3 , 3 , 4 , 3 , 4 , 4 , 5
+, 2 , 3 , 3 , 4 , 3 , 4 , 4 , 5 , 3 , 4 , 4 , 5 , 4 , 5 , 5 , 6
+, 2 , 3 , 3 , 4 , 3 , 4 , 4 , 5 , 3 , 4 , 4 , 5 , 4 , 5 , 5 , 6
+, 3 , 4 , 4 , 5 , 4 , 5 , 5 , 6 , 4 , 5 , 5 , 6 , 5 , 6 , 6 , 7
+, 2 , 3 , 3 , 4 , 3 , 4 , 4 , 5 , 3 , 4 , 4 , 5 , 4 , 5 , 5 , 6
+, 3 , 4 , 4 , 5 , 4 , 5 , 5 , 6 , 4 , 5 , 5 , 6 , 5 , 6 , 6 , 7
+, 3 , 4 , 4 , 5 , 4 , 5 , 5 , 6 , 4 , 5 , 5 , 6 , 5 , 6 , 6 , 7
+, 4 , 5 , 5 , 6 , 5 , 6 , 6 , 7 , 5 , 6 , 6 , 7 , 6 , 7 , 7 , 8
+ };
+
+/*
+select_lookup_table[i][j] simply gives the index of the j'th bit set to one in the byte of value i.
+For example if i=01010101 in binary then we have :
+select_lookup_table[i][0] = 0, the first bit set to one is at position 0
+select_lookup_table[i][1] = 2, the second bit set to one is at position 2
+select_lookup_table[i][2] = 4, the third bit set to one is at position 4
+select_lookup_table[i][3] = 6, the fourth bit set to one is at position 6
+select_lookup_table[i][4] = 255, there is no more than 4 bits set to one in i, so we return escape value 255.
+*/
+static cmph_uint8 select_lookup_table[256][8]={
+{ 255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 255 , 255 , 255 , 255 , 255 , 255 , 255 } ,
+{ 1 , 255 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 255 , 255 , 255 , 255 , 255 , 255 } ,
+{ 2 , 255 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 255 , 255 , 255 , 255 , 255 , 255 } ,
+{ 1 , 2 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 255 , 255 , 255 , 255 , 255 } ,
+{ 3 , 255 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 3 , 255 , 255 , 255 , 255 , 255 , 255 } ,
+{ 1 , 3 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 3 , 255 , 255 , 255 , 255 , 255 } ,
+{ 2 , 3 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 3 , 255 , 255 , 255 , 255 , 255 } ,
+{ 1 , 2 , 3 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 3 , 255 , 255 , 255 , 255 } ,
+{ 4 , 255 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 4 , 255 , 255 , 255 , 255 , 255 , 255 } ,
+{ 1 , 4 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 4 , 255 , 255 , 255 , 255 , 255 } ,
+{ 2 , 4 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 4 , 255 , 255 , 255 , 255 , 255 } ,
+{ 1 , 2 , 4 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 4 , 255 , 255 , 255 , 255 } ,
+{ 3 , 4 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 3 , 4 , 255 , 255 , 255 , 255 , 255 } ,
+{ 1 , 3 , 4 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 3 , 4 , 255 , 255 , 255 , 255 } ,
+{ 2 , 3 , 4 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 3 , 4 , 255 , 255 , 255 , 255 } ,
+{ 1 , 2 , 3 , 4 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 3 , 4 , 255 , 255 , 255 } ,
+{ 5 , 255 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 5 , 255 , 255 , 255 , 255 , 255 , 255 } ,
+{ 1 , 5 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 5 , 255 , 255 , 255 , 255 , 255 } ,
+{ 2 , 5 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 5 , 255 , 255 , 255 , 255 , 255 } ,
+{ 1 , 2 , 5 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 5 , 255 , 255 , 255 , 255 } ,
+{ 3 , 5 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 3 , 5 , 255 , 255 , 255 , 255 , 255 } ,
+{ 1 , 3 , 5 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 3 , 5 , 255 , 255 , 255 , 255 } ,
+{ 2 , 3 , 5 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 3 , 5 , 255 , 255 , 255 , 255 } ,
+{ 1 , 2 , 3 , 5 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 3 , 5 , 255 , 255 , 255 } ,
+{ 4 , 5 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 4 , 5 , 255 , 255 , 255 , 255 , 255 } ,
+{ 1 , 4 , 5 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 4 , 5 , 255 , 255 , 255 , 255 } ,
+{ 2 , 4 , 5 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 4 , 5 , 255 , 255 , 255 , 255 } ,
+{ 1 , 2 , 4 , 5 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 4 , 5 , 255 , 255 , 255 } ,
+{ 3 , 4 , 5 , 255 , 255 , 255 , 255 , 255 } , { 0 , 3 , 4 , 5 , 255 , 255 , 255 , 255 } ,
+{ 1 , 3 , 4 , 5 , 255 , 255 , 255 , 255 } , { 0 , 1 , 3 , 4 , 5 , 255 , 255 , 255 } ,
+{ 2 , 3 , 4 , 5 , 255 , 255 , 255 , 255 } , { 0 , 2 , 3 , 4 , 5 , 255 , 255 , 255 } ,
+{ 1 , 2 , 3 , 4 , 5 , 255 , 255 , 255 } , { 0 , 1 , 2 , 3 , 4 , 5 , 255 , 255 } ,
+{ 6 , 255 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 6 , 255 , 255 , 255 , 255 , 255 , 255 } ,
+{ 1 , 6 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 6 , 255 , 255 , 255 , 255 , 255 } ,
+{ 2 , 6 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 6 , 255 , 255 , 255 , 255 , 255 } ,
+{ 1 , 2 , 6 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 6 , 255 , 255 , 255 , 255 } ,
+{ 3 , 6 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 3 , 6 , 255 , 255 , 255 , 255 , 255 } ,
+{ 1 , 3 , 6 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 3 , 6 , 255 , 255 , 255 , 255 } ,
+{ 2 , 3 , 6 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 3 , 6 , 255 , 255 , 255 , 255 } ,
+{ 1 , 2 , 3 , 6 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 3 , 6 , 255 , 255 , 255 } ,
+{ 4 , 6 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 4 , 6 , 255 , 255 , 255 , 255 , 255 } ,
+{ 1 , 4 , 6 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 4 , 6 , 255 , 255 , 255 , 255 } ,
+{ 2 , 4 , 6 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 4 , 6 , 255 , 255 , 255 , 255 } ,
+{ 1 , 2 , 4 , 6 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 4 , 6 , 255 , 255 , 255 } ,
+{ 3 , 4 , 6 , 255 , 255 , 255 , 255 , 255 } , { 0 , 3 , 4 , 6 , 255 , 255 , 255 , 255 } ,
+{ 1 , 3 , 4 , 6 , 255 , 255 , 255 , 255 } , { 0 , 1 , 3 , 4 , 6 , 255 , 255 , 255 } ,
+{ 2 , 3 , 4 , 6 , 255 , 255 , 255 , 255 } , { 0 , 2 , 3 , 4 , 6 , 255 , 255 , 255 } ,
+{ 1 , 2 , 3 , 4 , 6 , 255 , 255 , 255 } , { 0 , 1 , 2 , 3 , 4 , 6 , 255 , 255 } ,
+{ 5 , 6 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 5 , 6 , 255 , 255 , 255 , 255 , 255 } ,
+{ 1 , 5 , 6 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 5 , 6 , 255 , 255 , 255 , 255 } ,
+{ 2 , 5 , 6 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 5 , 6 , 255 , 255 , 255 , 255 } ,
+{ 1 , 2 , 5 , 6 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 5 , 6 , 255 , 255 , 255 } ,
+{ 3 , 5 , 6 , 255 , 255 , 255 , 255 , 255 } , { 0 , 3 , 5 , 6 , 255 , 255 , 255 , 255 } ,
+{ 1 , 3 , 5 , 6 , 255 , 255 , 255 , 255 } , { 0 , 1 , 3 , 5 , 6 , 255 , 255 , 255 } ,
+{ 2 , 3 , 5 , 6 , 255 , 255 , 255 , 255 } , { 0 , 2 , 3 , 5 , 6 , 255 , 255 , 255 } ,
+{ 1 , 2 , 3 , 5 , 6 , 255 , 255 , 255 } , { 0 , 1 , 2 , 3 , 5 , 6 , 255 , 255 } ,
+{ 4 , 5 , 6 , 255 , 255 , 255 , 255 , 255 } , { 0 , 4 , 5 , 6 , 255 , 255 , 255 , 255 } ,
+{ 1 , 4 , 5 , 6 , 255 , 255 , 255 , 255 } , { 0 , 1 , 4 , 5 , 6 , 255 , 255 , 255 } ,
+{ 2 , 4 , 5 , 6 , 255 , 255 , 255 , 255 } , { 0 , 2 , 4 , 5 , 6 , 255 , 255 , 255 } ,
+{ 1 , 2 , 4 , 5 , 6 , 255 , 255 , 255 } , { 0 , 1 , 2 , 4 , 5 , 6 , 255 , 255 } ,
+{ 3 , 4 , 5 , 6 , 255 , 255 , 255 , 255 } , { 0 , 3 , 4 , 5 , 6 , 255 , 255 , 255 } ,
+{ 1 , 3 , 4 , 5 , 6 , 255 , 255 , 255 } , { 0 , 1 , 3 , 4 , 5 , 6 , 255 , 255 } ,
+{ 2 , 3 , 4 , 5 , 6 , 255 , 255 , 255 } , { 0 , 2 , 3 , 4 , 5 , 6 , 255 , 255 } ,
+{ 1 , 2 , 3 , 4 , 5 , 6 , 255 , 255 } , { 0 , 1 , 2 , 3 , 4 , 5 , 6 , 255 } ,
+{ 7 , 255 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 7 , 255 , 255 , 255 , 255 , 255 , 255 } ,
+{ 1 , 7 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 7 , 255 , 255 , 255 , 255 , 255 } ,
+{ 2 , 7 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 7 , 255 , 255 , 255 , 255 , 255 } ,
+{ 1 , 2 , 7 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 7 , 255 , 255 , 255 , 255 } ,
+{ 3 , 7 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 3 , 7 , 255 , 255 , 255 , 255 , 255 } ,
+{ 1 , 3 , 7 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 3 , 7 , 255 , 255 , 255 , 255 } ,
+{ 2 , 3 , 7 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 3 , 7 , 255 , 255 , 255 , 255 } ,
+{ 1 , 2 , 3 , 7 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 3 , 7 , 255 , 255 , 255 } ,
+{ 4 , 7 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 4 , 7 , 255 , 255 , 255 , 255 , 255 } ,
+{ 1 , 4 , 7 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 4 , 7 , 255 , 255 , 255 , 255 } ,
+{ 2 , 4 , 7 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 4 , 7 , 255 , 255 , 255 , 255 } ,
+{ 1 , 2 , 4 , 7 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 4 , 7 , 255 , 255 , 255 } ,
+{ 3 , 4 , 7 , 255 , 255 , 255 , 255 , 255 } , { 0 , 3 , 4 , 7 , 255 , 255 , 255 , 255 } ,
+{ 1 , 3 , 4 , 7 , 255 , 255 , 255 , 255 } , { 0 , 1 , 3 , 4 , 7 , 255 , 255 , 255 } ,
+{ 2 , 3 , 4 , 7 , 255 , 255 , 255 , 255 } , { 0 , 2 , 3 , 4 , 7 , 255 , 255 , 255 } ,
+{ 1 , 2 , 3 , 4 , 7 , 255 , 255 , 255 } , { 0 , 1 , 2 , 3 , 4 , 7 , 255 , 255 } ,
+{ 5 , 7 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 5 , 7 , 255 , 255 , 255 , 255 , 255 } ,
+{ 1 , 5 , 7 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 5 , 7 , 255 , 255 , 255 , 255 } ,
+{ 2 , 5 , 7 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 5 , 7 , 255 , 255 , 255 , 255 } ,
+{ 1 , 2 , 5 , 7 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 5 , 7 , 255 , 255 , 255 } ,
+{ 3 , 5 , 7 , 255 , 255 , 255 , 255 , 255 } , { 0 , 3 , 5 , 7 , 255 , 255 , 255 , 255 } ,
+{ 1 , 3 , 5 , 7 , 255 , 255 , 255 , 255 } , { 0 , 1 , 3 , 5 , 7 , 255 , 255 , 255 } ,
+{ 2 , 3 , 5 , 7 , 255 , 255 , 255 , 255 } , { 0 , 2 , 3 , 5 , 7 , 255 , 255 , 255 } ,
+{ 1 , 2 , 3 , 5 , 7 , 255 , 255 , 255 } , { 0 , 1 , 2 , 3 , 5 , 7 , 255 , 255 } ,
+{ 4 , 5 , 7 , 255 , 255 , 255 , 255 , 255 } , { 0 , 4 , 5 , 7 , 255 , 255 , 255 , 255 } ,
+{ 1 , 4 , 5 , 7 , 255 , 255 , 255 , 255 } , { 0 , 1 , 4 , 5 , 7 , 255 , 255 , 255 } ,
+{ 2 , 4 , 5 , 7 , 255 , 255 , 255 , 255 } , { 0 , 2 , 4 , 5 , 7 , 255 , 255 , 255 } ,
+{ 1 , 2 , 4 , 5 , 7 , 255 , 255 , 255 } , { 0 , 1 , 2 , 4 , 5 , 7 , 255 , 255 } ,
+{ 3 , 4 , 5 , 7 , 255 , 255 , 255 , 255 } , { 0 , 3 , 4 , 5 , 7 , 255 , 255 , 255 } ,
+{ 1 , 3 , 4 , 5 , 7 , 255 , 255 , 255 } , { 0 , 1 , 3 , 4 , 5 , 7 , 255 , 255 } ,
+{ 2 , 3 , 4 , 5 , 7 , 255 , 255 , 255 } , { 0 , 2 , 3 , 4 , 5 , 7 , 255 , 255 } ,
+{ 1 , 2 , 3 , 4 , 5 , 7 , 255 , 255 } , { 0 , 1 , 2 , 3 , 4 , 5 , 7 , 255 } ,
+{ 6 , 7 , 255 , 255 , 255 , 255 , 255 , 255 } , { 0 , 6 , 7 , 255 , 255 , 255 , 255 , 255 } ,
+{ 1 , 6 , 7 , 255 , 255 , 255 , 255 , 255 } , { 0 , 1 , 6 , 7 , 255 , 255 , 255 , 255 } ,
+{ 2 , 6 , 7 , 255 , 255 , 255 , 255 , 255 } , { 0 , 2 , 6 , 7 , 255 , 255 , 255 , 255 } ,
+{ 1 , 2 , 6 , 7 , 255 , 255 , 255 , 255 } , { 0 , 1 , 2 , 6 , 7 , 255 , 255 , 255 } ,
+{ 3 , 6 , 7 , 255 , 255 , 255 , 255 , 255 } , { 0 , 3 , 6 , 7 , 255 , 255 , 255 , 255 } ,
+{ 1 , 3 , 6 , 7 , 255 , 255 , 255 , 255 } , { 0 , 1 , 3 , 6 , 7 , 255 , 255 , 255 } ,
+{ 2 , 3 , 6 , 7 , 255 , 255 , 255 , 255 } , { 0 , 2 , 3 , 6 , 7 , 255 , 255 , 255 } ,
+{ 1 , 2 , 3 , 6 , 7 , 255 , 255 , 255 } , { 0 , 1 , 2 , 3 , 6 , 7 , 255 , 255 } ,
+{ 4 , 6 , 7 , 255 , 255 , 255 , 255 , 255 } , { 0 , 4 , 6 , 7 , 255 , 255 , 255 , 255 } ,
+{ 1 , 4 , 6 , 7 , 255 , 255 , 255 , 255 } , { 0 , 1 , 4 , 6 , 7 , 255 , 255 , 255 } ,
+{ 2 , 4 , 6 , 7 , 255 , 255 , 255 , 255 } , { 0 , 2 , 4 , 6 , 7 , 255 , 255 , 255 } ,
+{ 1 , 2 , 4 , 6 , 7 , 255 , 255 , 255 } , { 0 , 1 , 2 , 4 , 6 , 7 , 255 , 255 } ,
+{ 3 , 4 , 6 , 7 , 255 , 255 , 255 , 255 } , { 0 , 3 , 4 , 6 , 7 , 255 , 255 , 255 } ,
+{ 1 , 3 , 4 , 6 , 7 , 255 , 255 , 255 } , { 0 , 1 , 3 , 4 , 6 , 7 , 255 , 255 } ,
+{ 2 , 3 , 4 , 6 , 7 , 255 , 255 , 255 } , { 0 , 2 , 3 , 4 , 6 , 7 , 255 , 255 } ,
+{ 1 , 2 , 3 , 4 , 6 , 7 , 255 , 255 } , { 0 , 1 , 2 , 3 , 4 , 6 , 7 , 255 } ,
+{ 5 , 6 , 7 , 255 , 255 , 255 , 255 , 255 } , { 0 , 5 , 6 , 7 , 255 , 255 , 255 , 255 } ,
+{ 1 , 5 , 6 , 7 , 255 , 255 , 255 , 255 } , { 0 , 1 , 5 , 6 , 7 , 255 , 255 , 255 } ,
+{ 2 , 5 , 6 , 7 , 255 , 255 , 255 , 255 } , { 0 , 2 , 5 , 6 , 7 , 255 , 255 , 255 } ,
+{ 1 , 2 , 5 , 6 , 7 , 255 , 255 , 255 } , { 0 , 1 , 2 , 5 , 6 , 7 , 255 , 255 } ,
+{ 3 , 5 , 6 , 7 , 255 , 255 , 255 , 255 } , { 0 , 3 , 5 , 6 , 7 , 255 , 255 , 255 } ,
+{ 1 , 3 , 5 , 6 , 7 , 255 , 255 , 255 } , { 0 , 1 , 3 , 5 , 6 , 7 , 255 , 255 } ,
+{ 2 , 3 , 5 , 6 , 7 , 255 , 255 , 255 } , { 0 , 2 , 3 , 5 , 6 , 7 , 255 , 255 } ,
+{ 1 , 2 , 3 , 5 , 6 , 7 , 255 , 255 } , { 0 , 1 , 2 , 3 , 5 , 6 , 7 , 255 } ,
+{ 4 , 5 , 6 , 7 , 255 , 255 , 255 , 255 } , { 0 , 4 , 5 , 6 , 7 , 255 , 255 , 255 } ,
+{ 1 , 4 , 5 , 6 , 7 , 255 , 255 , 255 } , { 0 , 1 , 4 , 5 , 6 , 7 , 255 , 255 } ,
+{ 2 , 4 , 5 , 6 , 7 , 255 , 255 , 255 } , { 0 , 2 , 4 , 5 , 6 , 7 , 255 , 255 } ,
+{ 1 , 2 , 4 , 5 , 6 , 7 , 255 , 255 } , { 0 , 1 , 2 , 4 , 5 , 6 , 7 , 255 } ,
+{ 3 , 4 , 5 , 6 , 7 , 255 , 255 , 255 } , { 0 , 3 , 4 , 5 , 6 , 7 , 255 , 255 } ,
+{ 1 , 3 , 4 , 5 , 6 , 7 , 255 , 255 } , { 0 , 1 , 3 , 4 , 5 , 6 , 7 , 255 } ,
+{ 2 , 3 , 4 , 5 , 6 , 7 , 255 , 255 } , { 0 , 2 , 3 , 4 , 5 , 6 , 7 , 255 } ,
+{ 1 , 2 , 3 , 4 , 5 , 6 , 7 , 255 } , { 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 } };
+
+#endif
diff --git a/girepository/cmph/vqueue.c b/girepository/cmph/vqueue.c
new file mode 100644
index 00000000..0619dd7c
--- /dev/null
+++ b/girepository/cmph/vqueue.c
@@ -0,0 +1,51 @@
+#include "vqueue.h"
+#include <stdio.h>
+#include <assert.h>
+#include <stdlib.h>
+struct __vqueue_t
+{
+ cmph_uint32 * values;
+ cmph_uint32 beg, end, capacity;
+};
+
+vqueue_t * vqueue_new(cmph_uint32 capacity)
+{
+ size_t capacity_plus_one = capacity + 1;
+ vqueue_t *q = (vqueue_t *)malloc(sizeof(vqueue_t));
+ assert(q);
+ q->values = (cmph_uint32 *)calloc(capacity_plus_one, sizeof(cmph_uint32));
+ q->beg = q->end = 0;
+ q->capacity = (cmph_uint32) capacity_plus_one;
+ return q;
+}
+
+cmph_uint8 vqueue_is_empty(vqueue_t * q)
+{
+ return (cmph_uint8)(q->beg == q->end);
+}
+
+void vqueue_insert(vqueue_t * q, cmph_uint32 val)
+{
+ assert((q->end + 1)%q->capacity != q->beg); // Is queue full?
+ q->end = (q->end + 1)%q->capacity;
+ q->values[q->end] = val;
+}
+
+cmph_uint32 vqueue_remove(vqueue_t * q)
+{
+ assert(!vqueue_is_empty(q)); // Is queue empty?
+ q->beg = (q->beg + 1)%q->capacity;
+ return q->values[q->beg];
+}
+
+void vqueue_print(vqueue_t * q)
+{
+ cmph_uint32 i;
+ for (i = q->beg; i != q->end; i = (i + 1)%q->capacity)
+ fprintf(stderr, "%u\n", q->values[(i + 1)%q->capacity]);
+}
+
+void vqueue_destroy(vqueue_t *q)
+{
+ free(q->values); q->values = NULL; free(q);
+}
diff --git a/girepository/cmph/vqueue.h b/girepository/cmph/vqueue.h
new file mode 100644
index 00000000..86fccab6
--- /dev/null
+++ b/girepository/cmph/vqueue.h
@@ -0,0 +1,18 @@
+#ifndef __CMPH_VQUEUE_H__
+#define __CMPH_VQUEUE_H__
+
+#include "cmph_types.h"
+typedef struct __vqueue_t vqueue_t;
+
+vqueue_t * vqueue_new(cmph_uint32 capacity);
+
+cmph_uint8 vqueue_is_empty(vqueue_t * q);
+
+void vqueue_insert(vqueue_t * q, cmph_uint32 val);
+
+cmph_uint32 vqueue_remove(vqueue_t * q);
+
+void vqueue_print(vqueue_t * q);
+
+void vqueue_destroy(vqueue_t * q);
+#endif
diff --git a/girepository/cmph/vstack.c b/girepository/cmph/vstack.c
new file mode 100644
index 00000000..24555cd6
--- /dev/null
+++ b/girepository/cmph/vstack.c
@@ -0,0 +1,79 @@
+#include "vstack.h"
+
+#include <stdlib.h>
+#include <assert.h>
+
+//#define DEBUG
+#include "debug.h"
+
+struct __vstack_t
+{
+ cmph_uint32 pointer;
+ cmph_uint32 *values;
+ cmph_uint32 capacity;
+};
+
+vstack_t *vstack_new()
+{
+ vstack_t *stack = (vstack_t *)malloc(sizeof(vstack_t));
+ assert(stack);
+ stack->pointer = 0;
+ stack->values = NULL;
+ stack->capacity = 0;
+ return stack;
+}
+
+void vstack_destroy(vstack_t *stack)
+{
+ assert(stack);
+ free(stack->values);
+ free(stack);
+}
+
+void vstack_push(vstack_t *stack, cmph_uint32 val)
+{
+ assert(stack);
+ vstack_reserve(stack, stack->pointer + 1);
+ stack->values[stack->pointer] = val;
+ ++(stack->pointer);
+}
+void vstack_pop(vstack_t *stack)
+{
+ assert(stack);
+ assert(stack->pointer > 0);
+ --(stack->pointer);
+}
+
+cmph_uint32 vstack_top(vstack_t *stack)
+{
+ assert(stack);
+ assert(stack->pointer > 0);
+ return stack->values[(stack->pointer - 1)];
+}
+int vstack_empty(vstack_t *stack)
+{
+ assert(stack);
+ return stack->pointer == 0;
+}
+cmph_uint32 vstack_size(vstack_t *stack)
+{
+ return stack->pointer;
+}
+void vstack_reserve(vstack_t *stack, cmph_uint32 size)
+{
+ assert(stack);
+ if (stack->capacity < size)
+ {
+ cmph_uint32 new_capacity = stack->capacity + 1;
+ DEBUGP("Increasing current capacity %u to %u\n", stack->capacity, size);
+ while (new_capacity < size)
+ {
+ new_capacity *= 2;
+ }
+ stack->values = (cmph_uint32 *)realloc(stack->values, sizeof(cmph_uint32)*new_capacity);
+ assert(stack->values);
+ stack->capacity = new_capacity;
+ DEBUGP("Increased\n");
+ }
+}
+
diff --git a/girepository/cmph/vstack.h b/girepository/cmph/vstack.h
new file mode 100644
index 00000000..1cefaaff
--- /dev/null
+++ b/girepository/cmph/vstack.h
@@ -0,0 +1,18 @@
+#ifndef __CMPH_VSTACK_H__
+#define __CMPH_VSTACK_H__
+
+#include "cmph_types.h"
+typedef struct __vstack_t vstack_t;
+
+vstack_t *vstack_new();
+void vstack_destroy(vstack_t *stack);
+
+void vstack_push(vstack_t *stack, cmph_uint32 val);
+cmph_uint32 vstack_top(vstack_t *stack);
+void vstack_pop(vstack_t *stack);
+int vstack_empty(vstack_t *stack);
+cmph_uint32 vstack_size(vstack_t *stack);
+
+void vstack_reserve(vstack_t *stack, cmph_uint32 size);
+
+#endif
diff --git a/girepository/cmph/wingetopt.c b/girepository/cmph/wingetopt.c
new file mode 100644
index 00000000..c981d0f0
--- /dev/null
+++ b/girepository/cmph/wingetopt.c
@@ -0,0 +1,179 @@
+#ifdef WIN32
+/*****************************************************************************
+ *
+ * MODULE NAME : GETOPT.C
+ *
+ * COPYRIGHTS:
+ * This module contains code made available by IBM
+ * Corporation on an AS IS basis. Any one receiving the
+ * module is considered to be licensed under IBM copyrights
+ * to use the IBM-provided source code in any way he or she
+ * deems fit, including copying it, compiling it, modifying
+ * it, and redistributing it, with or without
+ * modifications. No license under any IBM patents or
+ * patent applications is to be implied from this copyright
+ * license.
+ *
+ * A user of the module should understand that IBM cannot
+ * provide technical support for the module and will not be
+ * responsible for any consequences of use of the program.
+ *
+ * Any notices, including this one, are not to be removed
+ * from the module without the prior written consent of
+ * IBM.
+ *
+ * AUTHOR: Original author:
+ * G. R. Blair (BOBBLAIR at AUSVM1)
+ * Internet: bobblair@bobblair.austin.ibm.com
+ *
+ * Extensively revised by:
+ * John Q. Walker II, Ph.D. (JOHHQ at RALVM6)
+ * Internet: johnq@ralvm6.vnet.ibm.com
+ *
+ *****************************************************************************/
+
+/******************************************************************************
+ * getopt()
+ *
+ * The getopt() function is a command line parser. It returns the next
+ * option character in argv that matches an option character in opstring.
+ *
+ * The argv argument points to an array of argc+1 elements containing argc
+ * pointers to character strings followed by a null pointer.
+ *
+ * The opstring argument points to a string of option characters; if an
+ * option character is followed by a colon, the option is expected to have
+ * an argument that may or may not be separated from it by white space.
+ * The external variable optarg is set to point to the start of the option
+ * argument on return from getopt().
+ *
+ * The getopt() function places in optind the argv index of the next argument
+ * to be processed. The system initializes the external variable optind to
+ * 1 before the first call to getopt().
+ *
+ * When all options have been processed (that is, up to the first nonoption
+ * argument), getopt() returns EOF. The special option "--" may be used to
+ * delimit the end of the options; EOF will be returned, and "--" will be
+ * skipped.
+ *
+ * The getopt() function returns a question mark (?) when it encounters an
+ * option character not included in opstring. This error message can be
+ * disabled by setting opterr to zero. Otherwise, it returns the option
+ * character that was detected.
+ *
+ * If the special option "--" is detected, or all options have been
+ * processed, EOF is returned.
+ *
+ * Options are marked by either a minus sign (-) or a slash (/).
+ *
+ * No errors are defined.
+ *****************************************************************************/
+
+#include <stdio.h> /* for EOF */
+#include <string.h> /* for strchr() */
+
+/* static (global) variables that are specified as exported by getopt() */
+extern char *optarg; /* pointer to the start of the option argument */
+extern int optind; /* number of the next argv[] to be evaluated */
+extern int opterr; /* non-zero if a question mark should be returned
+ when a non-valid option character is detected */
+
+/* handle possible future character set concerns by putting this in a macro */
+#define _next_char(string) (char)(*(string+1))
+
+int getopt(int argc, char *argv[], char *opstring)
+{
+ static char *pIndexPosition = NULL; /* place inside current argv string */
+ char *pArgString = NULL; /* where to start from next */
+ char *pOptString; /* the string in our program */
+
+
+ if (pIndexPosition != NULL) {
+ /* we last left off inside an argv string */
+ if (*(++pIndexPosition)) {
+ /* there is more to come in the most recent argv */
+ pArgString = pIndexPosition;
+ }
+ }
+
+ if (pArgString == NULL) {
+ /* we didn't leave off in the middle of an argv string */
+ if (optind >= argc) {
+ /* more command-line arguments than the argument count */
+ pIndexPosition = NULL; /* not in the middle of anything */
+ return EOF; /* used up all command-line arguments */
+ }
+
+ /*---------------------------------------------------------------------
+ * If the next argv[] is not an option, there can be no more options.
+ *-------------------------------------------------------------------*/
+ pArgString = argv[optind++]; /* set this to the next argument ptr */
+
+ if (('/' != *pArgString) && /* doesn't start with a slash or a dash? */
+ ('-' != *pArgString)) {
+ --optind; /* point to current arg once we're done */
+ optarg = NULL; /* no argument follows the option */
+ pIndexPosition = NULL; /* not in the middle of anything */
+ return EOF; /* used up all the command-line flags */
+ }
+
+ /* check for special end-of-flags markers */
+ if ((strcmp(pArgString, "-") == 0) ||
+ (strcmp(pArgString, "--") == 0)) {
+ optarg = NULL; /* no argument follows the option */
+ pIndexPosition = NULL; /* not in the middle of anything */
+ return EOF; /* encountered the special flag */
+ }
+
+ pArgString++; /* look past the / or - */
+ }
+
+ if (':' == *pArgString) { /* is it a colon? */
+ /*---------------------------------------------------------------------
+ * Rare case: if opterr is non-zero, return a question mark;
+ * otherwise, just return the colon we're on.
+ *-------------------------------------------------------------------*/
+ return (opterr ? (int)'?' : (int)':');
+ }
+ else if ((pOptString = strchr(opstring, *pArgString)) == 0) {
+ /*---------------------------------------------------------------------
+ * The letter on the command-line wasn't any good.
+ *-------------------------------------------------------------------*/
+ optarg = NULL; /* no argument follows the option */
+ pIndexPosition = NULL; /* not in the middle of anything */
+ return (opterr ? (int)'?' : (int)*pArgString);
+ }
+ else {
+ /*---------------------------------------------------------------------
+ * The letter on the command-line matches one we expect to see
+ *-------------------------------------------------------------------*/
+ if (':' == _next_char(pOptString)) { /* is the next letter a colon? */
+ /* It is a colon. Look for an argument string. */
+ if ('\0' != _next_char(pArgString)) { /* argument in this argv? */
+ optarg = &pArgString[1]; /* Yes, it is */
+ }
+ else {
+ /*-------------------------------------------------------------
+ * The argument string must be in the next argv.
+ * But, what if there is none (bad input from the user)?
+ * In that case, return the letter, and optarg as NULL.
+ *-----------------------------------------------------------*/
+ if (optind < argc)
+ optarg = argv[optind++];
+ else {
+ optarg = NULL;
+ return (opterr ? (int)'?' : (int)*pArgString);
+ }
+ }
+ pIndexPosition = NULL; /* not in the middle of anything */
+ }
+ else {
+ /* it's not a colon, so just return the letter */
+ optarg = NULL; /* no argument follows the option */
+ pIndexPosition = pArgString; /* point to the letter we're on */
+ }
+ return (int)*pArgString; /* return the letter that matched */
+ }
+}
+
+#endif //WIN32
diff --git a/girepository/cmph/wingetopt.h b/girepository/cmph/wingetopt.h
new file mode 100644
index 00000000..9596853d
--- /dev/null
+++ b/girepository/cmph/wingetopt.h
@@ -0,0 +1,25 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef WIN32
+ #include <getopt.h>
+#else
+ #ifndef _GETOPT_
+ #define _GETOPT_
+
+ #include <stdio.h> /* for EOF */
+ #include <string.h> /* for strchr() */
+
+ char *optarg = NULL; /* pointer to the start of the option argument */
+ int optind = 1; /* number of the next argv[] to be evaluated */
+ int opterr = 1; /* non-zero if a question mark should be returned */
+
+ int getopt(int argc, char *argv[], char *opstring);
+ #endif //_GETOPT_
+#endif //WIN32
+
+#ifdef __cplusplus
+}
+#endif
+