summaryrefslogtreecommitdiff
path: root/girepository/cmph/compressed_rank.c
diff options
context:
space:
mode:
Diffstat (limited to 'girepository/cmph/compressed_rank.c')
-rw-r--r--girepository/cmph/compressed_rank.c321
1 files changed, 321 insertions, 0 deletions
diff --git a/girepository/cmph/compressed_rank.c b/girepository/cmph/compressed_rank.c
new file mode 100644
index 00000000..822b2e15
--- /dev/null
+++ b/girepository/cmph/compressed_rank.c
@@ -0,0 +1,321 @@
+#include<stdlib.h>
+#include<stdio.h>
+#include<limits.h>
+#include<string.h>
+#include"compressed_rank.h"
+#include"bitbool.h"
+// #define DEBUG
+#include"debug.h"
+static inline cmph_uint32 compressed_rank_i_log2(cmph_uint32 x)
+{
+ register cmph_uint32 res = 0;
+
+ while(x > 1)
+ {
+ x >>= 1;
+ res++;
+ }
+ return res;
+};
+
+void compressed_rank_init(compressed_rank_t * cr)
+{
+ cr->max_val = 0;
+ cr->n = 0;
+ cr->rem_r = 0;
+ select_init(&cr->sel);
+ cr->vals_rems = 0;
+}
+
+void compressed_rank_destroy(compressed_rank_t * cr)
+{
+ free(cr->vals_rems);
+ cr->vals_rems = 0;
+ select_destroy(&cr->sel);
+}
+
+void compressed_rank_generate(compressed_rank_t * cr, cmph_uint32 * vals_table, cmph_uint32 n)
+{
+ register cmph_uint32 i,j;
+ register cmph_uint32 rems_mask;
+ register cmph_uint32 * select_vec = 0;
+ cr->n = n;
+ cr->max_val = vals_table[cr->n - 1];
+ cr->rem_r = compressed_rank_i_log2(cr->max_val/cr->n);
+ if(cr->rem_r == 0)
+ {
+ cr->rem_r = 1;
+ }
+ select_vec = (cmph_uint32 *) calloc(cr->max_val >> cr->rem_r, sizeof(cmph_uint32));
+ cr->vals_rems = (cmph_uint32 *) calloc(BITS_TABLE_SIZE(cr->n, cr->rem_r), sizeof(cmph_uint32));
+ rems_mask = (1U << cr->rem_r) - 1U;
+
+ for(i = 0; i < cr->n; i++)
+ {
+ set_bits_value(cr->vals_rems, i, vals_table[i] & rems_mask, cr->rem_r, rems_mask);
+ }
+
+ for(i = 1, j = 0; i <= cr->max_val >> cr->rem_r; i++)
+ {
+ while(i > (vals_table[j] >> cr->rem_r))
+ {
+ j++;
+ }
+ select_vec[i - 1] = j;
+ };
+
+
+ // FABIANO: before it was (cr->total_length >> cr->rem_r) + 1. But I wiped out the + 1 because
+ // I changed the select structure to work up to m, instead of up to m - 1.
+ select_generate(&cr->sel, select_vec, cr->max_val >> cr->rem_r, cr->n);
+
+ free(select_vec);
+}
+
+cmph_uint32 compressed_rank_query(compressed_rank_t * cr, cmph_uint32 idx)
+{
+ register cmph_uint32 rems_mask;
+ register cmph_uint32 val_quot, val_rem;
+ register cmph_uint32 sel_res, rank;
+
+ if(idx > cr->max_val)
+ {
+ return cr->n;
+ }
+
+ val_quot = idx >> cr->rem_r;
+ rems_mask = (1U << cr->rem_r) - 1U;
+ val_rem = idx & rems_mask;
+ if(val_quot == 0)
+ {
+ rank = sel_res = 0;
+ }
+ else
+ {
+ sel_res = select_query(&cr->sel, val_quot - 1) + 1;
+ rank = sel_res - val_quot;
+ }
+
+ do
+ {
+ if(GETBIT32(cr->sel.bits_vec, sel_res))
+ {
+ break;
+ }
+ if(get_bits_value(cr->vals_rems, rank, cr->rem_r, rems_mask) >= val_rem)
+ {
+ break;
+ }
+ sel_res++;
+ rank++;
+ } while(1);
+
+ return rank;
+}
+
+cmph_uint32 compressed_rank_get_space_usage(compressed_rank_t * cr)
+{
+ register cmph_uint32 space_usage = select_get_space_usage(&cr->sel);
+ space_usage += BITS_TABLE_SIZE(cr->n, cr->rem_r)*(cmph_uint32)sizeof(cmph_uint32)*8;
+ space_usage += 3*(cmph_uint32)sizeof(cmph_uint32)*8;
+ return space_usage;
+}
+
+void compressed_rank_dump(compressed_rank_t * cr, char **buf, cmph_uint32 *buflen)
+{
+ register cmph_uint32 sel_size = select_packed_size(&(cr->sel));
+ register cmph_uint32 vals_rems_size = BITS_TABLE_SIZE(cr->n, cr->rem_r) * (cmph_uint32)sizeof(cmph_uint32);
+ register cmph_uint32 pos = 0;
+ char * buf_sel = 0;
+ cmph_uint32 buflen_sel = 0;
+
+ *buflen = 4*(cmph_uint32)sizeof(cmph_uint32) + sel_size + vals_rems_size;
+
+ DEBUGP("sel_size = %u\n", sel_size);
+ DEBUGP("vals_rems_size = %u\n", vals_rems_size);
+
+ *buf = (char *)calloc(*buflen, sizeof(char));
+
+ if (!*buf)
+ {
+ *buflen = UINT_MAX;
+ return;
+ }
+
+ // dumping max_val, n and rem_r
+ memcpy(*buf, &(cr->max_val), sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("max_val = %u\n", cr->max_val);
+
+ memcpy(*buf + pos, &(cr->n), sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("n = %u\n", cr->n);
+
+ memcpy(*buf + pos, &(cr->rem_r), sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("rem_r = %u\n", cr->rem_r);
+
+ // dumping sel
+ select_dump(&cr->sel, &buf_sel, &buflen_sel);
+ memcpy(*buf + pos, &buflen_sel, sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("buflen_sel = %u\n", buflen_sel);
+
+ memcpy(*buf + pos, buf_sel, buflen_sel);
+
+ #ifdef DEBUG
+ cmph_uint32 i = 0;
+ for(i = 0; i < buflen_sel; i++)
+ {
+ DEBUGP("pos = %u -- buf_sel[%u] = %u\n", pos, i, *(*buf + pos + i));
+ }
+ #endif
+ pos += buflen_sel;
+
+ free(buf_sel);
+
+ // dumping vals_rems
+ memcpy(*buf + pos, cr->vals_rems, vals_rems_size);
+ #ifdef DEBUG
+ for(i = 0; i < vals_rems_size; i++)
+ {
+ DEBUGP("pos = %u -- vals_rems_size = %u -- vals_rems[%u] = %u\n", pos, vals_rems_size, i, *(*buf + pos + i));
+ }
+ #endif
+ pos += vals_rems_size;
+
+ DEBUGP("Dumped compressed rank structure with size %u bytes\n", *buflen);
+}
+
+void compressed_rank_load(compressed_rank_t * cr, const char *buf, cmph_uint32 buflen)
+{
+ register cmph_uint32 pos = 0;
+ cmph_uint32 buflen_sel = 0;
+ register cmph_uint32 vals_rems_size = 0;
+
+ // loading max_val, n, and rem_r
+ memcpy(&(cr->max_val), buf, sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("max_val = %u\n", cr->max_val);
+
+ memcpy(&(cr->n), buf + pos, sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("n = %u\n", cr->n);
+
+ memcpy(&(cr->rem_r), buf + pos, sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("rem_r = %u\n", cr->rem_r);
+
+ // loading sel
+ memcpy(&buflen_sel, buf + pos, sizeof(cmph_uint32));
+ pos += (cmph_uint32)sizeof(cmph_uint32);
+ DEBUGP("buflen_sel = %u\n", buflen_sel);
+
+ select_load(&cr->sel, buf + pos, buflen_sel);
+ #ifdef DEBUG
+ cmph_uint32 i = 0;
+ for(i = 0; i < buflen_sel; i++)
+ {
+ DEBUGP("pos = %u -- buf_sel[%u] = %u\n", pos, i, *(buf + pos + i));
+ }
+ #endif
+ pos += buflen_sel;
+
+ // loading vals_rems
+ if(cr->vals_rems)
+ {
+ free(cr->vals_rems);
+ }
+ vals_rems_size = BITS_TABLE_SIZE(cr->n, cr->rem_r);
+ cr->vals_rems = (cmph_uint32 *) calloc(vals_rems_size, sizeof(cmph_uint32));
+ vals_rems_size *= 4;
+ memcpy(cr->vals_rems, buf + pos, vals_rems_size);
+
+ #ifdef DEBUG
+ for(i = 0; i < vals_rems_size; i++)
+ {
+ DEBUGP("pos = %u -- vals_rems_size = %u -- vals_rems[%u] = %u\n", pos, vals_rems_size, i, *(buf + pos + i));
+ }
+ #endif
+ pos += vals_rems_size;
+
+ DEBUGP("Loaded compressed rank structure with size %u bytes\n", buflen);
+}
+
+
+
+void compressed_rank_pack(compressed_rank_t *cr, void *cr_packed)
+{
+ if (cr && cr_packed)
+ {
+ char *buf = NULL;
+ cmph_uint32 buflen = 0;
+ compressed_rank_dump(cr, &buf, &buflen);
+ memcpy(cr_packed, buf, buflen);
+ free(buf);
+ }
+}
+
+cmph_uint32 compressed_rank_packed_size(compressed_rank_t *cr)
+{
+ register cmph_uint32 sel_size = select_packed_size(&cr->sel);
+ register cmph_uint32 vals_rems_size = BITS_TABLE_SIZE(cr->n, cr->rem_r) * (cmph_uint32)sizeof(cmph_uint32);
+ return 4 * (cmph_uint32)sizeof(cmph_uint32) + sel_size + vals_rems_size;
+}
+
+cmph_uint32 compressed_rank_query_packed(void * cr_packed, cmph_uint32 idx)
+{
+ // unpacking cr_packed
+ register cmph_uint32 *ptr = (cmph_uint32 *)cr_packed;
+ register cmph_uint32 max_val = *ptr++;
+ register cmph_uint32 n = *ptr++;
+ register cmph_uint32 rem_r = *ptr++;
+ register cmph_uint32 buflen_sel = *ptr++;
+ register cmph_uint32 * sel_packed = ptr;
+
+ register cmph_uint32 * bits_vec = sel_packed + 2; // skipping n and m
+
+ register cmph_uint32 * vals_rems = (ptr += (buflen_sel >> 2));
+
+ // compressed sequence query computation
+ register cmph_uint32 rems_mask;
+ register cmph_uint32 val_quot, val_rem;
+ register cmph_uint32 sel_res, rank;
+
+ if(idx > max_val)
+ {
+ return n;
+ }
+
+ val_quot = idx >> rem_r;
+ rems_mask = (1U << rem_r) - 1U;
+ val_rem = idx & rems_mask;
+ if(val_quot == 0)
+ {
+ rank = sel_res = 0;
+ }
+ else
+ {
+ sel_res = select_query_packed(sel_packed, val_quot - 1) + 1;
+ rank = sel_res - val_quot;
+ }
+
+ do
+ {
+ if(GETBIT32(bits_vec, sel_res))
+ {
+ break;
+ }
+ if(get_bits_value(vals_rems, rank, rem_r, rems_mask) >= val_rem)
+ {
+ break;
+ }
+ sel_res++;
+ rank++;
+ } while(1);
+
+ return rank;
+}
+
+
+