summaryrefslogtreecommitdiff
path: root/girepository/cmph/compressed_seq.c
diff options
context:
space:
mode:
Diffstat (limited to 'girepository/cmph/compressed_seq.c')
-rw-r--r--girepository/cmph/compressed_seq.c378
1 files changed, 378 insertions, 0 deletions
diff --git a/girepository/cmph/compressed_seq.c b/girepository/cmph/compressed_seq.c
new file mode 100644
index 00000000..e558196d
--- /dev/null
+++ b/girepository/cmph/compressed_seq.c
@@ -0,0 +1,378 @@
+#include "compressed_seq.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <limits.h>
+#include <string.h>
+
+#include "bitbool.h"
+
+// #define DEBUG
+#include "debug.h"
+
+static inline cmph_uint32 compressed_seq_i_log2(cmph_uint32 x)
+{
+ register cmph_uint32 res = 0;
+
+ while(x > 1)
+ {
+ x >>= 1;
+ res++;
+ }
+ return res;
+};
+
+void compressed_seq_init(compressed_seq_t * cs)
+{
+ select_init(&cs->sel);
+ cs->n = 0;
+ cs->rem_r = 0;
+ cs->length_rems = 0;
+ cs->total_length = 0;
+ cs->store_table = 0;
+}
+
+void compressed_seq_destroy(compressed_seq_t * cs)
+{
+ free(cs->store_table);
+ cs->store_table = 0;
+ free(cs->length_rems);
+ cs->length_rems = 0;
+ select_destroy(&cs->sel);
+};
+
+
+void compressed_seq_generate(compressed_seq_t * cs, cmph_uint32 * vals_table, cmph_uint32 n)
+{
+ register cmph_uint32 i;
+ // lengths: represents lengths of encoded values
+ register cmph_uint32 * lengths = (cmph_uint32 *)calloc(n, sizeof(cmph_uint32));
+ register cmph_uint32 rems_mask;
+ register cmph_uint32 stored_value;
+
+ cs->n = n;
+ cs->total_length = 0;
+
+ for(i = 0; i < cs->n; i++)
+ {
+ if(vals_table[i] == 0)
+ {
+ lengths[i] = 0;
+ }
+ else
+ {
+ lengths[i] = compressed_seq_i_log2(vals_table[i] + 1);
+ cs->total_length += lengths[i];
+ };
+ };
+
+ if(cs->store_table)
+ {
+ free(cs->store_table);
+ }
+ cs->store_table = (cmph_uint32 *) calloc(((cs->total_length + 31) >> 5), sizeof(cmph_uint32));
+ cs->total_length = 0;
+
+ for(i = 0; i < cs->n; i++)
+ {
+ if(vals_table[i] == 0)
+ continue;
+ stored_value = vals_table[i] - ((1U << lengths[i]) - 1U);
+ set_bits_at_pos(cs->store_table, cs->total_length, stored_value, lengths[i]);
+ cs->total_length += lengths[i];
+ };
+
+ cs->rem_r = compressed_seq_i_log2(cs->total_length/cs->n);
+
+ if(cs->rem_r == 0)
+ {
+ cs->rem_r = 1;
+ }
+
+ if(cs->length_rems)
+ {
+ free(cs->length_rems);
+ }
+
+ cs->length_rems = (cmph_uint32 *) calloc(BITS_TABLE_SIZE(cs->n, cs->rem_r), sizeof(cmph_uint32));
+
+ rems_mask = (1U << cs->rem_r) - 1U;
+ cs->total_length = 0;
+
+ for(i = 0; i < cs->n; i++)
+ {
+ cs->total_length += lengths[i];
+ set_bits_value(cs->length_rems, i, cs->total_length & rems_mask, cs->rem_r, rems_mask);
+ lengths[i] = cs->total_length >> cs->rem_r;
+ };
+
+ select_init(&cs->sel);
+
+ // FABIANO: before it was (cs->total_length >> cs->rem_r) + 1. But I wiped out the + 1 because
+ // I changed the select structure to work up to m, instead of up to m - 1.
+ select_generate(&cs->sel, lengths, cs->n, (cs->total_length >> cs->rem_r));
+
+ free(lengths);
+};
+
+cmph_uint32 compressed_seq_get_space_usage(compressed_seq_t * cs)
+{
+ register cmph_uint32 space_usage = select_get_space_usage(&cs->sel);
+ space_usage += ((cs->total_length + 31) >> 5) * (cmph_uint32)sizeof(cmph_uint32) * 8;
+ space_usage += BITS_TABLE_SIZE(cs->n, cs->rem_r) * (cmph_uint32)sizeof(cmph_uint32) * 8;
+ return 4 * (cmph_uint32)sizeof(cmph_uint32) * 8 + space_usage;
+}
+
+cmph_uint32 compressed_seq_query(compressed_seq_t * cs, cmph_uint32 idx)
+{
+ register cmph_uint32 enc_idx, enc_length;
+ register cmph_uint32 rems_mask;
+ register cmph_uint32 stored_value;
+ register cmph_uint32 sel_res;
+
+ assert(idx < cs->n); // FABIANO ADDED
+
+ rems_mask = (1U << cs->rem_r) - 1U;
+
+ if(idx == 0)
+ {
+ enc_idx = 0;
+ sel_res = select_query(&cs->sel, idx);
+ }
+ else
+ {
+ sel_res = select_query(&cs->sel, idx - 1);
+
+ enc_idx = (sel_res - (idx - 1)) << cs->rem_r;
+ enc_idx += get_bits_value(cs->length_rems, idx-1, cs->rem_r, rems_mask);
+
+ sel_res = select_next_query(&cs->sel, sel_res);
+ };
+
+ enc_length = (sel_res - idx) << cs->rem_r;
+ enc_length += get_bits_value(cs->length_rems, idx, cs->rem_r, rems_mask);
+ enc_length -= enc_idx;
+ if(enc_length == 0)
+ return 0;
+
+ stored_value = get_bits_at_pos(cs->store_table, enc_idx, enc_length);
+ return stored_value + ((1U << enc_length) - 1U);
+};
+
+void compressed_seq_dump(compressed_seq_t * cs, char ** buf, cmph_uint32 * buflen)
+{
+ register cmph_uint32 sel_size = select_packed_size(&(cs->sel));
+ register cmph_uint32 length_rems_size = BITS_TABLE_SIZE(cs->n, cs->rem_r) * 4;
+ register cmph_uint32 store_table_size = ((cs->total_length + 31) >> 5) * 4;
+ register cmph_uint32 pos = 0;
+ char * buf_sel = 0;
+ cmph_uint32 buflen_sel = 0;
+
+ *buflen = 4*(cmph_uint32)sizeof(cmph_uint32) + sel_size + length_rems_size + store_table_size;
+
+ DEBUGP("sel_size = %u\n", sel_size);
+ DEBUGP("length_rems_size = %u\n", length_rems_size);
+ DEBUGP("store_table_size = %u\n", store_table_size);
+ *buf = (char *)calloc(*buflen, sizeof(char));
+
+ if (!*buf)
+ {
+ *buflen = UINT_MAX;
+ return;
+ }
+
+ // dumping n, rem_r and total_length
+ memcpy(*buf, &(cs->n), sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("n = %u\n", cs->n);
+
+ memcpy(*buf + pos, &(cs->rem_r), sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("rem_r = %u\n", cs->rem_r);
+
+ memcpy(*buf + pos, &(cs->total_length), sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("total_length = %u\n", cs->total_length);
+
+
+ // dumping sel
+ select_dump(&cs->sel, &buf_sel, &buflen_sel);
+ memcpy(*buf + pos, &buflen_sel, sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("buflen_sel = %u\n", buflen_sel);
+
+ memcpy(*buf + pos, buf_sel, buflen_sel);
+ #ifdef DEBUG
+ cmph_uint32 i = 0;
+ for(i = 0; i < buflen_sel; i++)
+ {
+ DEBUGP("pos = %u -- buf_sel[%u] = %u\n", pos, i, *(*buf + pos + i));
+ }
+ #endif
+ pos += buflen_sel;
+
+ free(buf_sel);
+
+ // dumping length_rems
+ memcpy(*buf + pos, cs->length_rems, length_rems_size);
+ #ifdef DEBUG
+ for(i = 0; i < length_rems_size; i++)
+ {
+ DEBUGP("pos = %u -- length_rems_size = %u -- length_rems[%u] = %u\n", pos, length_rems_size, i, *(*buf + pos + i));
+ }
+ #endif
+ pos += length_rems_size;
+
+ // dumping store_table
+ memcpy(*buf + pos, cs->store_table, store_table_size);
+
+ #ifdef DEBUG
+ for(i = 0; i < store_table_size; i++)
+ {
+ DEBUGP("pos = %u -- store_table_size = %u -- store_table[%u] = %u\n", pos, store_table_size, i, *(*buf + pos + i));
+ }
+ #endif
+ DEBUGP("Dumped compressed sequence structure with size %u bytes\n", *buflen);
+}
+
+void compressed_seq_load(compressed_seq_t * cs, const char * buf, cmph_uint32 buflen)
+{
+ register cmph_uint32 pos = 0;
+ cmph_uint32 buflen_sel = 0;
+ register cmph_uint32 length_rems_size = 0;
+ register cmph_uint32 store_table_size = 0;
+
+ // loading n, rem_r and total_length
+ memcpy(&(cs->n), buf, sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("n = %u\n", cs->n);
+
+ memcpy(&(cs->rem_r), buf + pos, sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("rem_r = %u\n", cs->rem_r);
+
+ memcpy(&(cs->total_length), buf + pos, sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("total_length = %u\n", cs->total_length);
+
+ // loading sel
+ memcpy(&buflen_sel, buf + pos, sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("buflen_sel = %u\n", buflen_sel);
+
+ select_load(&cs->sel, buf + pos, buflen_sel);
+ #ifdef DEBUG
+ cmph_uint32 i = 0;
+ for(i = 0; i < buflen_sel; i++)
+ {
+ DEBUGP("pos = %u -- buf_sel[%u] = %u\n", pos, i, *(buf + pos + i));
+ }
+ #endif
+ pos += buflen_sel;
+
+ // loading length_rems
+ if(cs->length_rems)
+ {
+ free(cs->length_rems);
+ }
+ length_rems_size = BITS_TABLE_SIZE(cs->n, cs->rem_r);
+ cs->length_rems = (cmph_uint32 *) calloc(length_rems_size, sizeof(cmph_uint32));
+ length_rems_size *= 4;
+ memcpy(cs->length_rems, buf + pos, length_rems_size);
+
+ #ifdef DEBUG
+ for(i = 0; i < length_rems_size; i++)
+ {
+ DEBUGP("pos = %u -- length_rems_size = %u -- length_rems[%u] = %u\n", pos, length_rems_size, i, *(buf + pos + i));
+ }
+ #endif
+ pos += length_rems_size;
+
+ // loading store_table
+ store_table_size = ((cs->total_length + 31) >> 5);
+ if(cs->store_table)
+ {
+ free(cs->store_table);
+ }
+ cs->store_table = (cmph_uint32 *) calloc(store_table_size, sizeof(cmph_uint32));
+ store_table_size *= 4;
+ memcpy(cs->store_table, buf + pos, store_table_size);
+
+ #ifdef DEBUG
+ for(i = 0; i < store_table_size; i++)
+ {
+ DEBUGP("pos = %u -- store_table_size = %u -- store_table[%u] = %u\n", pos, store_table_size, i, *(buf + pos + i));
+ }
+ #endif
+
+ DEBUGP("Loaded compressed sequence structure with size %u bytes\n", buflen);
+}
+
+void compressed_seq_pack(compressed_seq_t *cs, void *cs_packed)
+{
+ if (cs && cs_packed)
+ {
+ char *buf = NULL;
+ cmph_uint32 buflen = 0;
+ compressed_seq_dump(cs, &buf, &buflen);
+ memcpy(cs_packed, buf, buflen);
+ free(buf);
+ }
+
+}
+
+cmph_uint32 compressed_seq_packed_size(compressed_seq_t *cs)
+{
+ register cmph_uint32 sel_size = select_packed_size(&cs->sel);
+ register cmph_uint32 store_table_size = ((cs->total_length + 31) >> 5) * (cmph_uint32)sizeof(cmph_uint32);
+ register cmph_uint32 length_rems_size = BITS_TABLE_SIZE(cs->n, cs->rem_r) * (cmph_uint32)sizeof(cmph_uint32);
+ return 4 * (cmph_uint32)sizeof(cmph_uint32) + sel_size + store_table_size + length_rems_size;
+}
+
+
+cmph_uint32 compressed_seq_query_packed(void * cs_packed, cmph_uint32 idx)
+{
+ // unpacking cs_packed
+ register cmph_uint32 *ptr = (cmph_uint32 *)cs_packed;
+ register cmph_uint32 n = *ptr++;
+ register cmph_uint32 rem_r = *ptr++;
+ ptr++; // skipping total_length
+// register cmph_uint32 total_length = *ptr++;
+ register cmph_uint32 buflen_sel = *ptr++;
+ register cmph_uint32 * sel_packed = ptr;
+ register cmph_uint32 * length_rems = (ptr += (buflen_sel >> 2));
+ register cmph_uint32 length_rems_size = BITS_TABLE_SIZE(n, rem_r);
+ register cmph_uint32 * store_table = (ptr += length_rems_size);
+
+ // compressed sequence query computation
+ register cmph_uint32 enc_idx, enc_length;
+ register cmph_uint32 rems_mask;
+ register cmph_uint32 stored_value;
+ register cmph_uint32 sel_res;
+
+ rems_mask = (1U << rem_r) - 1U;
+
+ if(idx == 0)
+ {
+ enc_idx = 0;
+ sel_res = select_query_packed(sel_packed, idx);
+ }
+ else
+ {
+ sel_res = select_query_packed(sel_packed, idx - 1);
+
+ enc_idx = (sel_res - (idx - 1)) << rem_r;
+ enc_idx += get_bits_value(length_rems, idx-1, rem_r, rems_mask);
+
+ sel_res = select_next_query_packed(sel_packed, sel_res);
+ };
+
+ enc_length = (sel_res - idx) << rem_r;
+ enc_length += get_bits_value(length_rems, idx, rem_r, rems_mask);
+ enc_length -= enc_idx;
+ if(enc_length == 0)
+ return 0;
+
+ stored_value = get_bits_at_pos(store_table, enc_idx, enc_length);
+ return stored_value + ((1U << enc_length) - 1U);
+}