322 lines
7.6 KiB
C
322 lines
7.6 KiB
C
#include<stdlib.h>
|
|
#include<stdio.h>
|
|
#include<limits.h>
|
|
#include<string.h>
|
|
#include"compressed_rank.h"
|
|
#include"bitbool.h"
|
|
// #define DEBUG
|
|
#include"debug.h"
|
|
static inline cmph_uint32 compressed_rank_i_log2(cmph_uint32 x)
|
|
{
|
|
register cmph_uint32 res = 0;
|
|
|
|
while(x > 1)
|
|
{
|
|
x >>= 1;
|
|
res++;
|
|
}
|
|
return res;
|
|
};
|
|
|
|
void compressed_rank_init(compressed_rank_t * cr)
|
|
{
|
|
cr->max_val = 0;
|
|
cr->n = 0;
|
|
cr->rem_r = 0;
|
|
select_init(&cr->sel);
|
|
cr->vals_rems = 0;
|
|
}
|
|
|
|
void compressed_rank_destroy(compressed_rank_t * cr)
|
|
{
|
|
free(cr->vals_rems);
|
|
cr->vals_rems = 0;
|
|
select_destroy(&cr->sel);
|
|
}
|
|
|
|
void compressed_rank_generate(compressed_rank_t * cr, cmph_uint32 * vals_table, cmph_uint32 n)
|
|
{
|
|
register cmph_uint32 i,j;
|
|
register cmph_uint32 rems_mask;
|
|
register cmph_uint32 * select_vec = 0;
|
|
cr->n = n;
|
|
cr->max_val = vals_table[cr->n - 1];
|
|
cr->rem_r = compressed_rank_i_log2(cr->max_val/cr->n);
|
|
if(cr->rem_r == 0)
|
|
{
|
|
cr->rem_r = 1;
|
|
}
|
|
select_vec = (cmph_uint32 *) calloc(cr->max_val >> cr->rem_r, sizeof(cmph_uint32));
|
|
cr->vals_rems = (cmph_uint32 *) calloc(BITS_TABLE_SIZE(cr->n, cr->rem_r), sizeof(cmph_uint32));
|
|
rems_mask = (1U << cr->rem_r) - 1U;
|
|
|
|
for(i = 0; i < cr->n; i++)
|
|
{
|
|
set_bits_value(cr->vals_rems, i, vals_table[i] & rems_mask, cr->rem_r, rems_mask);
|
|
}
|
|
|
|
for(i = 1, j = 0; i <= cr->max_val >> cr->rem_r; i++)
|
|
{
|
|
while(i > (vals_table[j] >> cr->rem_r))
|
|
{
|
|
j++;
|
|
}
|
|
select_vec[i - 1] = j;
|
|
};
|
|
|
|
|
|
// FABIANO: before it was (cr->total_length >> cr->rem_r) + 1. But I wiped out the + 1 because
|
|
// I changed the select structure to work up to m, instead of up to m - 1.
|
|
select_generate(&cr->sel, select_vec, cr->max_val >> cr->rem_r, cr->n);
|
|
|
|
free(select_vec);
|
|
}
|
|
|
|
cmph_uint32 compressed_rank_query(compressed_rank_t * cr, cmph_uint32 idx)
|
|
{
|
|
register cmph_uint32 rems_mask;
|
|
register cmph_uint32 val_quot, val_rem;
|
|
register cmph_uint32 sel_res, rank;
|
|
|
|
if(idx > cr->max_val)
|
|
{
|
|
return cr->n;
|
|
}
|
|
|
|
val_quot = idx >> cr->rem_r;
|
|
rems_mask = (1U << cr->rem_r) - 1U;
|
|
val_rem = idx & rems_mask;
|
|
if(val_quot == 0)
|
|
{
|
|
rank = sel_res = 0;
|
|
}
|
|
else
|
|
{
|
|
sel_res = select_query(&cr->sel, val_quot - 1) + 1;
|
|
rank = sel_res - val_quot;
|
|
}
|
|
|
|
do
|
|
{
|
|
if(GETBIT32(cr->sel.bits_vec, sel_res))
|
|
{
|
|
break;
|
|
}
|
|
if(get_bits_value(cr->vals_rems, rank, cr->rem_r, rems_mask) >= val_rem)
|
|
{
|
|
break;
|
|
}
|
|
sel_res++;
|
|
rank++;
|
|
} while(1);
|
|
|
|
return rank;
|
|
}
|
|
|
|
cmph_uint32 compressed_rank_get_space_usage(compressed_rank_t * cr)
|
|
{
|
|
register cmph_uint32 space_usage = select_get_space_usage(&cr->sel);
|
|
space_usage += BITS_TABLE_SIZE(cr->n, cr->rem_r)*(cmph_uint32)sizeof(cmph_uint32)*8;
|
|
space_usage += 3*(cmph_uint32)sizeof(cmph_uint32)*8;
|
|
return space_usage;
|
|
}
|
|
|
|
void compressed_rank_dump(compressed_rank_t * cr, char **buf, cmph_uint32 *buflen)
|
|
{
|
|
register cmph_uint32 sel_size = select_packed_size(&(cr->sel));
|
|
register cmph_uint32 vals_rems_size = BITS_TABLE_SIZE(cr->n, cr->rem_r) * (cmph_uint32)sizeof(cmph_uint32);
|
|
register cmph_uint32 pos = 0;
|
|
char * buf_sel = 0;
|
|
cmph_uint32 buflen_sel = 0;
|
|
|
|
*buflen = 4*(cmph_uint32)sizeof(cmph_uint32) + sel_size + vals_rems_size;
|
|
|
|
DEBUGP("sel_size = %u\n", sel_size);
|
|
DEBUGP("vals_rems_size = %u\n", vals_rems_size);
|
|
|
|
*buf = (char *)calloc(*buflen, sizeof(char));
|
|
|
|
if (!*buf)
|
|
{
|
|
*buflen = UINT_MAX;
|
|
return;
|
|
}
|
|
|
|
// dumping max_val, n and rem_r
|
|
memcpy(*buf, &(cr->max_val), sizeof(cmph_uint32));
|
|
pos += (cmph_uint32)sizeof(cmph_uint32);
|
|
DEBUGP("max_val = %u\n", cr->max_val);
|
|
|
|
memcpy(*buf + pos, &(cr->n), sizeof(cmph_uint32));
|
|
pos += (cmph_uint32)sizeof(cmph_uint32);
|
|
DEBUGP("n = %u\n", cr->n);
|
|
|
|
memcpy(*buf + pos, &(cr->rem_r), sizeof(cmph_uint32));
|
|
pos += (cmph_uint32)sizeof(cmph_uint32);
|
|
DEBUGP("rem_r = %u\n", cr->rem_r);
|
|
|
|
// dumping sel
|
|
select_dump(&cr->sel, &buf_sel, &buflen_sel);
|
|
memcpy(*buf + pos, &buflen_sel, sizeof(cmph_uint32));
|
|
pos += (cmph_uint32)sizeof(cmph_uint32);
|
|
DEBUGP("buflen_sel = %u\n", buflen_sel);
|
|
|
|
memcpy(*buf + pos, buf_sel, buflen_sel);
|
|
|
|
#ifdef DEBUG
|
|
cmph_uint32 i = 0;
|
|
for(i = 0; i < buflen_sel; i++)
|
|
{
|
|
DEBUGP("pos = %u -- buf_sel[%u] = %u\n", pos, i, *(*buf + pos + i));
|
|
}
|
|
#endif
|
|
pos += buflen_sel;
|
|
|
|
free(buf_sel);
|
|
|
|
// dumping vals_rems
|
|
memcpy(*buf + pos, cr->vals_rems, vals_rems_size);
|
|
#ifdef DEBUG
|
|
for(i = 0; i < vals_rems_size; i++)
|
|
{
|
|
DEBUGP("pos = %u -- vals_rems_size = %u -- vals_rems[%u] = %u\n", pos, vals_rems_size, i, *(*buf + pos + i));
|
|
}
|
|
#endif
|
|
pos += vals_rems_size;
|
|
|
|
DEBUGP("Dumped compressed rank structure with size %u bytes\n", *buflen);
|
|
}
|
|
|
|
void compressed_rank_load(compressed_rank_t * cr, const char *buf, cmph_uint32 buflen)
|
|
{
|
|
register cmph_uint32 pos = 0;
|
|
cmph_uint32 buflen_sel = 0;
|
|
register cmph_uint32 vals_rems_size = 0;
|
|
|
|
// loading max_val, n, and rem_r
|
|
memcpy(&(cr->max_val), buf, sizeof(cmph_uint32));
|
|
pos += (cmph_uint32)sizeof(cmph_uint32);
|
|
DEBUGP("max_val = %u\n", cr->max_val);
|
|
|
|
memcpy(&(cr->n), buf + pos, sizeof(cmph_uint32));
|
|
pos += (cmph_uint32)sizeof(cmph_uint32);
|
|
DEBUGP("n = %u\n", cr->n);
|
|
|
|
memcpy(&(cr->rem_r), buf + pos, sizeof(cmph_uint32));
|
|
pos += (cmph_uint32)sizeof(cmph_uint32);
|
|
DEBUGP("rem_r = %u\n", cr->rem_r);
|
|
|
|
// loading sel
|
|
memcpy(&buflen_sel, buf + pos, sizeof(cmph_uint32));
|
|
pos += (cmph_uint32)sizeof(cmph_uint32);
|
|
DEBUGP("buflen_sel = %u\n", buflen_sel);
|
|
|
|
select_load(&cr->sel, buf + pos, buflen_sel);
|
|
#ifdef DEBUG
|
|
cmph_uint32 i = 0;
|
|
for(i = 0; i < buflen_sel; i++)
|
|
{
|
|
DEBUGP("pos = %u -- buf_sel[%u] = %u\n", pos, i, *(buf + pos + i));
|
|
}
|
|
#endif
|
|
pos += buflen_sel;
|
|
|
|
// loading vals_rems
|
|
if(cr->vals_rems)
|
|
{
|
|
free(cr->vals_rems);
|
|
}
|
|
vals_rems_size = BITS_TABLE_SIZE(cr->n, cr->rem_r);
|
|
cr->vals_rems = (cmph_uint32 *) calloc(vals_rems_size, sizeof(cmph_uint32));
|
|
vals_rems_size *= 4;
|
|
memcpy(cr->vals_rems, buf + pos, vals_rems_size);
|
|
|
|
#ifdef DEBUG
|
|
for(i = 0; i < vals_rems_size; i++)
|
|
{
|
|
DEBUGP("pos = %u -- vals_rems_size = %u -- vals_rems[%u] = %u\n", pos, vals_rems_size, i, *(buf + pos + i));
|
|
}
|
|
#endif
|
|
pos += vals_rems_size;
|
|
|
|
DEBUGP("Loaded compressed rank structure with size %u bytes\n", buflen);
|
|
}
|
|
|
|
|
|
|
|
void compressed_rank_pack(compressed_rank_t *cr, void *cr_packed)
|
|
{
|
|
if (cr && cr_packed)
|
|
{
|
|
char *buf = NULL;
|
|
cmph_uint32 buflen = 0;
|
|
compressed_rank_dump(cr, &buf, &buflen);
|
|
memcpy(cr_packed, buf, buflen);
|
|
free(buf);
|
|
}
|
|
}
|
|
|
|
cmph_uint32 compressed_rank_packed_size(compressed_rank_t *cr)
|
|
{
|
|
register cmph_uint32 sel_size = select_packed_size(&cr->sel);
|
|
register cmph_uint32 vals_rems_size = BITS_TABLE_SIZE(cr->n, cr->rem_r) * (cmph_uint32)sizeof(cmph_uint32);
|
|
return 4 * (cmph_uint32)sizeof(cmph_uint32) + sel_size + vals_rems_size;
|
|
}
|
|
|
|
cmph_uint32 compressed_rank_query_packed(void * cr_packed, cmph_uint32 idx)
|
|
{
|
|
// unpacking cr_packed
|
|
register cmph_uint32 *ptr = (cmph_uint32 *)cr_packed;
|
|
register cmph_uint32 max_val = *ptr++;
|
|
register cmph_uint32 n = *ptr++;
|
|
register cmph_uint32 rem_r = *ptr++;
|
|
register cmph_uint32 buflen_sel = *ptr++;
|
|
register cmph_uint32 * sel_packed = ptr;
|
|
|
|
register cmph_uint32 * bits_vec = sel_packed + 2; // skipping n and m
|
|
|
|
register cmph_uint32 * vals_rems = (ptr += (buflen_sel >> 2));
|
|
|
|
// compressed sequence query computation
|
|
register cmph_uint32 rems_mask;
|
|
register cmph_uint32 val_quot, val_rem;
|
|
register cmph_uint32 sel_res, rank;
|
|
|
|
if(idx > max_val)
|
|
{
|
|
return n;
|
|
}
|
|
|
|
val_quot = idx >> rem_r;
|
|
rems_mask = (1U << rem_r) - 1U;
|
|
val_rem = idx & rems_mask;
|
|
if(val_quot == 0)
|
|
{
|
|
rank = sel_res = 0;
|
|
}
|
|
else
|
|
{
|
|
sel_res = select_query_packed(sel_packed, val_quot - 1) + 1;
|
|
rank = sel_res - val_quot;
|
|
}
|
|
|
|
do
|
|
{
|
|
if(GETBIT32(bits_vec, sel_res))
|
|
{
|
|
break;
|
|
}
|
|
if(get_bits_value(vals_rems, rank, rem_r, rems_mask) >= val_rem)
|
|
{
|
|
break;
|
|
}
|
|
sel_res++;
|
|
rank++;
|
|
} while(1);
|
|
|
|
return rank;
|
|
}
|
|
|
|
|
|
|