*** empty log message ***
This commit is contained in:
parent
7e265956c9
commit
417c7fb458
@ -60,7 +60,7 @@ libcmph_la_LIBADD =
|
||||
am_libcmph_la_OBJECTS = hash.lo jenkins_hash.lo vstack.lo vqueue.lo \
|
||||
graph.lo cmph.lo cmph_structs.lo chm.lo bmz.lo bmz8.lo bdz.lo \
|
||||
bdz_ph.lo buffer_manager.lo buffer_entry.lo brz.lo fch.lo \
|
||||
fch_buckets.lo select.lo
|
||||
fch_buckets.lo select.lo compressed_seq.lo
|
||||
libcmph_la_OBJECTS = $(am_libcmph_la_OBJECTS)
|
||||
libcmph_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
|
||||
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
|
||||
@ -207,7 +207,7 @@ libcmph_la_SOURCES = hash.c jenkins_hash.c\
|
||||
chm.c bmz.c bmz8.c bdz.c bdz_ph.c\
|
||||
buffer_manager.c buffer_entry.c\
|
||||
brz.c fch.c fch_buckets.c \
|
||||
select.c
|
||||
select.c compressed_seq.c
|
||||
|
||||
libcmph_la_LDFLAGS = -version-info 0:0:0
|
||||
cmph_SOURCES = main.c wingetopt.h wingetopt.c
|
||||
@ -322,6 +322,7 @@ include ./$(DEPDIR)/buffer_manager.Plo
|
||||
include ./$(DEPDIR)/chm.Plo
|
||||
include ./$(DEPDIR)/cmph.Plo
|
||||
include ./$(DEPDIR)/cmph_structs.Plo
|
||||
include ./$(DEPDIR)/compressed_seq.Plo
|
||||
include ./$(DEPDIR)/fch.Plo
|
||||
include ./$(DEPDIR)/fch_buckets.Plo
|
||||
include ./$(DEPDIR)/graph.Plo
|
||||
|
@ -7,7 +7,7 @@ libcmph_la_SOURCES = hash.c jenkins_hash.c\
|
||||
chm.c bmz.c bmz8.c bdz.c bdz_ph.c\
|
||||
buffer_manager.c buffer_entry.c\
|
||||
brz.c fch.c fch_buckets.c \
|
||||
select.c
|
||||
select.c compressed_seq.c
|
||||
|
||||
libcmph_la_LDFLAGS = -version-info 0:0:0
|
||||
|
||||
|
@ -60,7 +60,7 @@ libcmph_la_LIBADD =
|
||||
am_libcmph_la_OBJECTS = hash.lo jenkins_hash.lo vstack.lo vqueue.lo \
|
||||
graph.lo cmph.lo cmph_structs.lo chm.lo bmz.lo bmz8.lo bdz.lo \
|
||||
bdz_ph.lo buffer_manager.lo buffer_entry.lo brz.lo fch.lo \
|
||||
fch_buckets.lo select.lo
|
||||
fch_buckets.lo select.lo compressed_seq.lo
|
||||
libcmph_la_OBJECTS = $(am_libcmph_la_OBJECTS)
|
||||
libcmph_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
|
||||
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
|
||||
@ -207,7 +207,7 @@ libcmph_la_SOURCES = hash.c jenkins_hash.c\
|
||||
chm.c bmz.c bmz8.c bdz.c bdz_ph.c\
|
||||
buffer_manager.c buffer_entry.c\
|
||||
brz.c fch.c fch_buckets.c \
|
||||
select.c
|
||||
select.c compressed_seq.c
|
||||
|
||||
libcmph_la_LDFLAGS = -version-info 0:0:0
|
||||
cmph_SOURCES = main.c wingetopt.h wingetopt.c
|
||||
@ -322,6 +322,7 @@ distclean-compile:
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chm.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmph.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmph_structs.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compressed_seq.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fch.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fch_buckets.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/graph.Plo@am__quote@
|
||||
|
@ -2,10 +2,15 @@
|
||||
#define _CMPH_BITBOOL_H__
|
||||
#include "cmph_types.h"
|
||||
|
||||
static const cmph_uint8 bitmask[] = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
|
||||
static const cmph_uint8 valuemask[] = { 0xfc, 0xf3, 0xcf, 0x3f};
|
||||
static const cmph_uint8 bitmask[] = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
|
||||
|
||||
// extern const cmph_uint8 bitmask[];
|
||||
static const cmph_uint32 bitmask32[] = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7,
|
||||
1 << 8, 1 << 9, 1 << 10, 1 << 11, 1 << 12, 1 << 13, 1 << 14, 1 << 15,
|
||||
1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20, 1 << 21, 1 << 22, 1 << 23,
|
||||
1 << 24, 1 << 25, 1 << 26, 1 << 27, 1 << 28, 1 << 29, 1 << 30, 1 << 31
|
||||
};
|
||||
|
||||
static const cmph_uint8 valuemask[] = { 0xfc, 0xf3, 0xcf, 0x3f};
|
||||
|
||||
|
||||
/** \def GETBIT(array, i)
|
||||
@ -39,7 +44,6 @@ static const cmph_uint8 valuemask[] = { 0xfc, 0xf3, 0xcf, 0x3f};
|
||||
//#define SETBIT(array, i) (array[(i) / 8] |= bitmask[(i) % 8])
|
||||
//#define UNSETBIT(array, i) (array[(i) / 8] &= (~(bitmask[(i) % 8])))
|
||||
|
||||
extern const cmph_uint8 valuemask[];
|
||||
|
||||
/** \def SETVALUE1(array, i, v)
|
||||
* \brief set a value for a 2-bit integer stored in an array initialized with 1s.
|
||||
@ -76,4 +80,91 @@ extern const cmph_uint8 valuemask[];
|
||||
#define GETVALUE(array, i) ((array[i >> 2] >> ((i & 0x00000003) << 1)) & 0x00000003)
|
||||
|
||||
|
||||
|
||||
/** \def SETBIT32(array, i)
|
||||
* \brief set 1 to an 1-bit integer stored in an array of 32-bit words.
|
||||
* \param array to store 1-bit integer values. The entries are 32-bit words.
|
||||
* \param i is the index in array to set the the bit to 1
|
||||
*
|
||||
* SETBIT32(array, i) is a macro that sets 1 to an 1-bit integer stored in an array of 32-bit words.
|
||||
*/
|
||||
#define SETBIT32(array, i) (array[i >> 5] |= bitmask32[i & 0x0000001f])
|
||||
|
||||
/** \def GETBIT32(array, i)
|
||||
* \brief get the value of an 1-bit integer stored in an array of 32-bit words.
|
||||
* \param array to get 1-bit integer values from. The entries are 32-bit words.
|
||||
* \param i is the index in array to get the 1-bit integer value from
|
||||
*
|
||||
* GETBIT32(array, i) is a macro that gets the value of an 1-bit integer stored in an array of 32-bit words.
|
||||
*/
|
||||
#define GETBIT32(array, i) (array[i >> 5] & bitmask32[i & 0x0000001f])
|
||||
|
||||
#define BITS_TABLE_SIZE(n, bits_length) ((n * bits_length + 31) >> 5)
|
||||
|
||||
static inline void set_bits_value(cmph_uint32 * bits_table, cmph_uint32 index, cmph_uint32 bits_string,
|
||||
cmph_uint32 string_length, cmph_uint32 string_mask)
|
||||
{
|
||||
register cmph_uint32 bit_idx = index * string_length;
|
||||
register cmph_uint32 word_idx = bit_idx >> 5;
|
||||
register cmph_uint32 shift1 = bit_idx & 0x0000001f;
|
||||
register cmph_uint32 shift2 = 32 - shift1;
|
||||
|
||||
bits_table[word_idx] &= ~((string_mask) << shift1);
|
||||
bits_table[word_idx] |= bits_string << shift1;
|
||||
|
||||
if(shift2 < string_length)
|
||||
{
|
||||
bits_table[word_idx+1] &= ~((string_mask) >> shift2);
|
||||
bits_table[word_idx+1] |= bits_string >> shift2;
|
||||
};
|
||||
};
|
||||
|
||||
static inline cmph_uint32 get_bits_value(cmph_uint32 * bits_table,cmph_uint32 index, cmph_uint32 string_length, cmph_uint32 string_mask)
|
||||
{
|
||||
register cmph_uint32 bit_idx = index * string_length;
|
||||
register cmph_uint32 word_idx = bit_idx >> 5;
|
||||
register cmph_uint32 shift1 = bit_idx & 0x0000001f;
|
||||
register cmph_uint32 shift2 = 32-shift1;
|
||||
register cmph_uint32 bits_string;
|
||||
|
||||
bits_string = (bits_table[word_idx] >> shift1) & string_mask;
|
||||
|
||||
if(shift2 < string_length)
|
||||
bits_string |= (bits_table[word_idx+1] << shift2) & string_mask;
|
||||
|
||||
return bits_string;
|
||||
};
|
||||
|
||||
static inline void set_bits_at_pos(cmph_uint32 * bits_table, cmph_uint32 pos, cmph_uint32 bits_string, cmph_uint32 string_length)
|
||||
{
|
||||
register cmph_uint32 word_idx = pos >> 5;
|
||||
register cmph_uint32 shift1 = pos & 0x0000001f;
|
||||
register cmph_uint32 shift2 = 32-shift1;
|
||||
register cmph_uint32 string_mask = (1 << string_length) - 1;
|
||||
|
||||
bits_table[word_idx] &= ~((string_mask) << shift1);
|
||||
bits_table[word_idx] |= bits_string << shift1;
|
||||
if(shift2 < string_length)
|
||||
{
|
||||
bits_table[word_idx+1] &= ~((string_mask) >> shift2);
|
||||
bits_table[word_idx+1] |= bits_string >> shift2;
|
||||
}
|
||||
};
|
||||
|
||||
static inline cmph_uint32 get_bits_at_pos(cmph_uint32 * bits_table,cmph_uint32 pos,cmph_uint32 string_length)
|
||||
{
|
||||
register cmph_uint32 word_idx = pos >> 5;
|
||||
register cmph_uint32 shift1 = pos & 0x0000001f;
|
||||
register cmph_uint32 shift2 = 32 - shift1;
|
||||
register cmph_uint32 string_mask = (1 << string_length) - 1;
|
||||
register cmph_uint32 bits_string;
|
||||
|
||||
bits_string = (bits_table[word_idx] >> shift1) & string_mask;
|
||||
|
||||
if(shift2 < string_length)
|
||||
bits_string |= (bits_table[word_idx+1] << shift2) & string_mask;
|
||||
return bits_string;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
373
src/compressed_seq.c
Normal file
373
src/compressed_seq.c
Normal file
@ -0,0 +1,373 @@
|
||||
#include "compressed_seq.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "bitbool.h"
|
||||
|
||||
// #define DEBUG
|
||||
#include "debug.h"
|
||||
|
||||
static inline cmph_uint32 i_log2(cmph_uint32 x)
|
||||
{
|
||||
register cmph_uint32 res = 0;
|
||||
|
||||
while(x > 1)
|
||||
{
|
||||
x >>= 1;
|
||||
res++;
|
||||
}
|
||||
return res;
|
||||
};
|
||||
|
||||
void compressed_seq_init(compressed_seq_t * cs)
|
||||
{
|
||||
select_init(&cs->sel);
|
||||
cs->n = 0;
|
||||
cs->rem_r = 0;
|
||||
cs->length_rems = 0;
|
||||
cs->total_length = 0;
|
||||
cs->store_table = 0;
|
||||
}
|
||||
|
||||
void compressed_seq_destroy(compressed_seq_t * cs)
|
||||
{
|
||||
free(cs->store_table);
|
||||
cs->store_table = 0;
|
||||
free(cs->length_rems);
|
||||
cs->length_rems = 0;
|
||||
select_destroy(&cs->sel);
|
||||
};
|
||||
|
||||
|
||||
void compressed_seq_generate(compressed_seq_t * cs, cmph_uint32 * vals_table, cmph_uint32 n)
|
||||
{
|
||||
register cmph_uint32 i;
|
||||
// lengths: represents lengths of encoded values
|
||||
register cmph_uint32 * lengths = (cmph_uint32 *)calloc(n, sizeof(cmph_uint32));
|
||||
register cmph_uint32 rems_mask;
|
||||
register cmph_uint32 stored_value;
|
||||
|
||||
cs->n = n;
|
||||
cs->total_length = 0;
|
||||
|
||||
for(i = 0; i < cs->n; i++)
|
||||
{
|
||||
if(vals_table[i] == 0)
|
||||
{
|
||||
lengths[i] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
lengths[i] = i_log2(vals_table[i] + 1);
|
||||
cs->total_length += lengths[i];
|
||||
};
|
||||
};
|
||||
|
||||
if(cs->store_table)
|
||||
{
|
||||
free(cs->store_table);
|
||||
}
|
||||
cs->store_table = (cmph_uint32 *) calloc(((cs->total_length + 31) >> 5), sizeof(cmph_uint32));
|
||||
cs->total_length = 0;
|
||||
|
||||
for(i = 0; i < cs->n; i++)
|
||||
{
|
||||
if(vals_table[i] == 0)
|
||||
continue;
|
||||
stored_value = vals_table[i] - ((1 << lengths[i]) - 1);
|
||||
set_bits_at_pos(cs->store_table, cs->total_length, stored_value, lengths[i]);
|
||||
cs->total_length += lengths[i];
|
||||
};
|
||||
|
||||
cs->rem_r = i_log2(cs->total_length/cs->n);
|
||||
|
||||
if(cs->length_rems)
|
||||
{
|
||||
free(cs->length_rems);
|
||||
}
|
||||
|
||||
cs->length_rems = (cmph_uint32 *) calloc(BITS_TABLE_SIZE(cs->n, cs->rem_r), sizeof(cmph_uint32));
|
||||
|
||||
rems_mask = (1 << cs->rem_r) - 1;
|
||||
cs->total_length = 0;
|
||||
|
||||
for(i = 0; i < cs->n; i++)
|
||||
{
|
||||
cs->total_length += lengths[i];
|
||||
set_bits_value(cs->length_rems, i, cs->total_length & rems_mask, cs->rem_r, rems_mask);
|
||||
lengths[i] = cs->total_length >> cs->rem_r;
|
||||
};
|
||||
|
||||
select_init(&cs->sel);
|
||||
|
||||
// FABIANO: before it was (cs->total_length >> cs->rem_r) + 1. But I wiped out the + 1 because
|
||||
// I changed the select structure to work up to m, instead of up to m - 1.
|
||||
select_generate(&cs->sel, lengths, cs->n, (cs->total_length >> cs->rem_r));
|
||||
|
||||
free(lengths);
|
||||
};
|
||||
|
||||
cmph_uint32 compressed_seq_get_space_usage(compressed_seq_t * cs)
|
||||
{
|
||||
register cmph_uint32 space_usage = select_get_space_usage(&cs->sel);
|
||||
space_usage += ((cs->total_length + 31) >> 5) * sizeof(cmph_uint32) * 8;
|
||||
space_usage += BITS_TABLE_SIZE(cs->n, cs->rem_r) * sizeof(cmph_uint32) * 8;
|
||||
return 4 * sizeof(cmph_uint32) * 8 + space_usage;
|
||||
}
|
||||
|
||||
cmph_int32 compressed_seq_query(compressed_seq_t * cs, cmph_uint32 idx)
|
||||
{
|
||||
register cmph_uint32 enc_idx, enc_length;
|
||||
register cmph_uint32 rems_mask;
|
||||
register cmph_uint32 stored_value;
|
||||
register cmph_uint32 sel_res;
|
||||
|
||||
assert(idx < cs->n); // FABIANO ADDED
|
||||
|
||||
rems_mask = (1 << cs->rem_r) - 1;
|
||||
|
||||
if(idx == 0)
|
||||
{
|
||||
enc_idx = 0;
|
||||
sel_res = select_query(&cs->sel, idx);
|
||||
}
|
||||
else
|
||||
{
|
||||
sel_res = select_query(&cs->sel, idx - 1);
|
||||
|
||||
enc_idx = (sel_res - (idx - 1)) << cs->rem_r;
|
||||
enc_idx += get_bits_value(cs->length_rems, idx-1, cs->rem_r, rems_mask);
|
||||
|
||||
sel_res = select_next_query(&cs->sel, sel_res);
|
||||
};
|
||||
|
||||
enc_length = (sel_res - idx) << cs->rem_r;
|
||||
enc_length += get_bits_value(cs->length_rems, idx, cs->rem_r, rems_mask);
|
||||
enc_length -= enc_idx;
|
||||
if(enc_length == 0)
|
||||
return 0;
|
||||
|
||||
stored_value = get_bits_at_pos(cs->store_table, enc_idx, enc_length);
|
||||
return stored_value + ((1 << enc_length) - 1);
|
||||
};
|
||||
|
||||
void compressed_seq_dump(compressed_seq_t * cs, char ** buf, cmph_uint32 * buflen)
|
||||
{
|
||||
register cmph_uint32 sel_size = select_get_space_usage(&cs->sel) >> 3;
|
||||
register cmph_uint32 length_rems_size = BITS_TABLE_SIZE(cs->n, cs->rem_r) * 4;
|
||||
register cmph_uint32 store_table_size = ((cs->total_length + 31) >> 5) * 4;
|
||||
register cmph_uint32 pos = 0;
|
||||
char * buf_sel = 0;
|
||||
cmph_uint32 buflen_sel = 0;
|
||||
|
||||
*buflen = 4*sizeof(cmph_uint32) + sel_size + length_rems_size + store_table_size;
|
||||
|
||||
DEBUGP("sel_size = %u\n", sel_size);
|
||||
DEBUGP("length_rems_size = %u\n", length_rems_size);
|
||||
DEBUGP("store_table_size = %u\n", store_table_size);
|
||||
*buf = (char *)calloc(*buflen, sizeof(char));
|
||||
|
||||
if (!*buf)
|
||||
{
|
||||
*buflen = UINT_MAX;
|
||||
return;
|
||||
}
|
||||
|
||||
// dumping n, rem_r and total_length
|
||||
memcpy(*buf, &(cs->n), sizeof(cmph_uint32));
|
||||
pos += sizeof(cmph_uint32);
|
||||
DEBUGP("n = %u\n", cs->n);
|
||||
|
||||
memcpy(*buf + pos, &(cs->rem_r), sizeof(cmph_uint32));
|
||||
pos += sizeof(cmph_uint32);
|
||||
DEBUGP("rem_r = %u\n", cs->rem_r);
|
||||
|
||||
memcpy(*buf + pos, &(cs->total_length), sizeof(cmph_uint32));
|
||||
pos += sizeof(cmph_uint32);
|
||||
DEBUGP("total_length = %u\n", cs->total_length);
|
||||
|
||||
|
||||
// dumping sel
|
||||
select_dump(&cs->sel, &buf_sel, &buflen_sel);
|
||||
memcpy(*buf + pos, &buflen_sel, sizeof(cmph_uint32));
|
||||
pos += sizeof(cmph_uint32);
|
||||
DEBUGP("buflen_sel = %u\n", buflen_sel);
|
||||
|
||||
memcpy(*buf + pos, buf_sel, buflen_sel);
|
||||
#ifdef DEBUG
|
||||
cmph_uint32 i = 0;
|
||||
for(i = 0; i < buflen_sel; i++)
|
||||
{
|
||||
DEBUGP("pos = %u -- buf_sel[%u] = %u\n", pos, i, *(*buf + pos + i));
|
||||
}
|
||||
#endif
|
||||
pos += buflen_sel;
|
||||
|
||||
free(buf_sel);
|
||||
|
||||
// dumping length_rems
|
||||
memcpy(*buf + pos, cs->length_rems, length_rems_size);
|
||||
#ifdef DEBUG
|
||||
for(i = 0; i < length_rems_size; i++)
|
||||
{
|
||||
DEBUGP("pos = %u -- length_rems_size = %u -- length_rems[%u] = %u\n", pos, length_rems_size, i, *(*buf + pos + i));
|
||||
}
|
||||
#endif
|
||||
pos += length_rems_size;
|
||||
|
||||
// dumping store_table
|
||||
memcpy(*buf + pos, cs->store_table, store_table_size);
|
||||
|
||||
#ifdef DEBUG
|
||||
for(i = 0; i < store_table_size; i++)
|
||||
{
|
||||
DEBUGP("pos = %u -- store_table_size = %u -- store_table[%u] = %u\n", pos, store_table_size, i, *(*buf + pos + i));
|
||||
}
|
||||
#endif
|
||||
DEBUGP("Dumped compressed sequence structure with size %u bytes\n", *buflen);
|
||||
}
|
||||
|
||||
void compressed_seq_load(compressed_seq_t * cs, const char * buf, cmph_uint32 buflen)
|
||||
{
|
||||
register cmph_uint32 pos = 0;
|
||||
cmph_uint32 buflen_sel = 0;
|
||||
register cmph_uint32 length_rems_size = 0;
|
||||
register cmph_uint32 store_table_size = 0;
|
||||
|
||||
// loading n, rem_r and total_length
|
||||
memcpy(&(cs->n), buf, sizeof(cmph_uint32));
|
||||
pos += sizeof(cmph_uint32);
|
||||
DEBUGP("n = %u\n", cs->n);
|
||||
|
||||
memcpy(&(cs->rem_r), buf + pos, sizeof(cmph_uint32));
|
||||
pos += sizeof(cmph_uint32);
|
||||
DEBUGP("rem_r = %u\n", cs->rem_r);
|
||||
|
||||
memcpy(&(cs->total_length), buf + pos, sizeof(cmph_uint32));
|
||||
pos += sizeof(cmph_uint32);
|
||||
DEBUGP("total_length = %u\n", cs->total_length);
|
||||
|
||||
// loading sel
|
||||
memcpy(&buflen_sel, buf + pos, sizeof(cmph_uint32));
|
||||
pos += sizeof(cmph_uint32);
|
||||
DEBUGP("buflen_sel = %u\n", buflen_sel);
|
||||
|
||||
select_load(&cs->sel, buf + pos, buflen_sel);
|
||||
#ifdef DEBUG
|
||||
cmph_uint32 i = 0;
|
||||
for(i = 0; i < buflen_sel; i++)
|
||||
{
|
||||
DEBUGP("pos = %u -- buf_sel[%u] = %u\n", pos, i, *(buf + pos + i));
|
||||
}
|
||||
#endif
|
||||
pos += buflen_sel;
|
||||
|
||||
// loading length_rems
|
||||
if(cs->length_rems)
|
||||
{
|
||||
free(cs->length_rems);
|
||||
}
|
||||
length_rems_size = BITS_TABLE_SIZE(cs->n, cs->rem_r);
|
||||
cs->length_rems = (cmph_uint32 *) calloc(length_rems_size, sizeof(cmph_uint32));
|
||||
length_rems_size *= 4;
|
||||
memcpy(cs->length_rems, buf + pos, length_rems_size);
|
||||
|
||||
#ifdef DEBUG
|
||||
for(i = 0; i < length_rems_size; i++)
|
||||
{
|
||||
DEBUGP("pos = %u -- length_rems_size = %u -- length_rems[%u] = %u\n", pos, length_rems_size, i, *(buf + pos + i));
|
||||
}
|
||||
#endif
|
||||
pos += length_rems_size;
|
||||
|
||||
// loading store_table
|
||||
store_table_size = ((cs->total_length + 31) >> 5);
|
||||
if(cs->store_table)
|
||||
{
|
||||
free(cs->store_table);
|
||||
}
|
||||
cs->store_table = (cmph_uint32 *) calloc(store_table_size, sizeof(cmph_uint32));
|
||||
store_table_size *= 4;
|
||||
memcpy(cs->store_table, buf + pos, store_table_size);
|
||||
|
||||
#ifdef DEBUG
|
||||
for(i = 0; i < store_table_size; i++)
|
||||
{
|
||||
DEBUGP("pos = %u -- store_table_size = %u -- store_table[%u] = %u\n", pos, store_table_size, i, *(buf + pos + i));
|
||||
}
|
||||
#endif
|
||||
|
||||
DEBUGP("Loaded compressed sequence structure with size %u bytes\n", buflen);
|
||||
}
|
||||
|
||||
void compressed_seq_pack(compressed_seq_t *cs, void *cs_packed)
|
||||
{
|
||||
if (cs && cs_packed)
|
||||
{
|
||||
char *buf = NULL;
|
||||
cmph_uint32 buflen = 0;
|
||||
compressed_seq_dump(cs, &buf, &buflen);
|
||||
memcpy(cs_packed, buf, buflen);
|
||||
free(buf);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
cmph_uint32 compressed_seq_packed_size(compressed_seq_t *cs)
|
||||
{
|
||||
register cmph_uint32 sel_size = select_packed_size(&cs->sel);
|
||||
register cmph_uint32 store_table_size = ((cs->total_length + 31) >> 5) * sizeof(cmph_uint32);
|
||||
register cmph_uint32 length_rems_size = BITS_TABLE_SIZE(cs->n, cs->rem_r) * sizeof(cmph_uint32);
|
||||
return 4 * sizeof(cmph_uint32) + sel_size + store_table_size + length_rems_size;
|
||||
}
|
||||
|
||||
|
||||
cmph_int32 compressed_seq_query_packed(void * cs_packed, cmph_uint32 idx)
|
||||
{
|
||||
// unpacking cs_packed
|
||||
register cmph_uint32 *ptr = (cmph_uint32 *)cs_packed;
|
||||
register cmph_uint32 n = *ptr++;
|
||||
register cmph_uint32 rem_r = *ptr++;
|
||||
ptr++; // skipping total_length
|
||||
// register cmph_uint32 total_length = *ptr++;
|
||||
register cmph_uint32 buflen_sel = *ptr++;
|
||||
register cmph_uint32 * sel_packed = ptr;
|
||||
register cmph_uint32 * length_rems = (ptr += (buflen_sel >> 2));
|
||||
register cmph_uint32 length_rems_size = BITS_TABLE_SIZE(n, rem_r);
|
||||
register cmph_uint32 * store_table = (ptr += length_rems_size);
|
||||
|
||||
// compressed sequence query computation
|
||||
register cmph_uint32 enc_idx, enc_length;
|
||||
register cmph_uint32 rems_mask;
|
||||
register cmph_uint32 stored_value;
|
||||
register cmph_uint32 sel_res;
|
||||
|
||||
rems_mask = (1 << rem_r) - 1;
|
||||
|
||||
if(idx == 0)
|
||||
{
|
||||
enc_idx = 0;
|
||||
sel_res = select_query_packed(sel_packed, idx);
|
||||
}
|
||||
else
|
||||
{
|
||||
sel_res = select_query_packed(sel_packed, idx - 1);
|
||||
|
||||
enc_idx = (sel_res - (idx - 1)) << rem_r;
|
||||
enc_idx += get_bits_value(length_rems, idx-1, rem_r, rems_mask);
|
||||
|
||||
sel_res = select_next_query_packed(sel_packed, sel_res);
|
||||
};
|
||||
|
||||
enc_length = (sel_res - idx) << rem_r;
|
||||
enc_length += get_bits_value(length_rems, idx, rem_r, rems_mask);
|
||||
enc_length -= enc_idx;
|
||||
if(enc_length == 0)
|
||||
return 0;
|
||||
|
||||
stored_value = get_bits_at_pos(store_table, enc_idx, enc_length);
|
||||
return stored_value + ((1 << enc_length) - 1);
|
||||
}
|
84
src/compressed_seq.h
Normal file
84
src/compressed_seq.h
Normal file
@ -0,0 +1,84 @@
|
||||
#ifndef __CMPH_COMPRESSED_SEQ_H__
|
||||
#define __CMPH_COMPRESSED_SEQ_H__
|
||||
|
||||
#include"select.h"
|
||||
|
||||
struct _compressed_seq_t
|
||||
{
|
||||
cmph_uint32 n; // number of values stored in store_table
|
||||
// The length in bits of each value is decomposed into two compnents: the lg(n) MSBs are stored in rank_select data structure
|
||||
// the remaining LSBs are stored in a table of n cells, each one of rem_r bits.
|
||||
cmph_uint32 rem_r;
|
||||
cmph_uint32 total_length; // total length in bits of stored_table
|
||||
select_t sel;
|
||||
cmph_uint32 * length_rems;
|
||||
cmph_uint32 * store_table;
|
||||
};
|
||||
|
||||
typedef struct _compressed_seq_t compressed_seq_t;
|
||||
|
||||
/** \fn void compressed_seq_init(compressed_seq_t * cs);
|
||||
* \brief Initialize a compressed sequence structure.
|
||||
* \param cs points to the compressed sequence structure to be initialized
|
||||
*/
|
||||
void compressed_seq_init(compressed_seq_t * cs);
|
||||
|
||||
/** \fn void compressed_seq_destroy(compressed_seq_t * cs);
|
||||
* \brief Destroy a compressed sequence given as input.
|
||||
* \param cs points to the compressed sequence structure to be destroyed
|
||||
*/
|
||||
void compressed_seq_destroy(compressed_seq_t * cs);
|
||||
|
||||
/** \fn void compressed_seq_generate(compressed_seq_t * cs, cmph_uint32 * vals_table, cmph_uint32 n);
|
||||
* \brief Generate a compressed sequence from an input array with n values.
|
||||
* \param cs points to the compressed sequence structure
|
||||
* \param vals_table poiter to the array given as input
|
||||
* \param n number of values in @see vals_table
|
||||
*/
|
||||
void compressed_seq_generate(compressed_seq_t * cs, cmph_uint32 * vals_table, cmph_uint32 n);
|
||||
|
||||
|
||||
/** \fn cmph_int32 compressed_seq_query(compressed_seq_t * cs, cmph_uint32 idx);
|
||||
* \brief Returns the value stored at index @see idx of the compressed sequence structure.
|
||||
* \param cs points to the compressed sequence structure
|
||||
* \param idx index to retrieve the value from
|
||||
* \return the value stored at index @see idx of the compressed sequence structure
|
||||
*/
|
||||
cmph_int32 compressed_seq_query(compressed_seq_t * cs, cmph_uint32 idx);
|
||||
|
||||
|
||||
/** \fn cmph_uint32 compressed_seq_get_space_usage(compressed_seq_t * cs);
|
||||
* \brief Returns amount of space (in bits) to store the compressed sequence.
|
||||
* \param cs points to the compressed sequence structure
|
||||
* \return the amount of space (in bits) to store @see cs
|
||||
*/
|
||||
cmph_uint32 compressed_seq_get_space_usage(compressed_seq_t * cs);
|
||||
|
||||
void compressed_seq_dump(compressed_seq_t * cs, char ** buf, cmph_uint32 * buflen);
|
||||
|
||||
void compressed_seq_load(compressed_seq_t * cs, const char * buf, cmph_uint32 buflen);
|
||||
|
||||
|
||||
/** \fn void compressed_seq_pack(compressed_seq_t *cs, void *cs_packed);
|
||||
* \brief Support the ability to pack a compressed sequence structure into a preallocated contiguous memory space pointed by cs_packed.
|
||||
* \param cs points to the compressed sequence structure
|
||||
* \param cs_packed pointer to the contiguous memory area used to store the compressed sequence structure. The size of cs_packed must be at least @see compressed_seq_packed_size
|
||||
*/
|
||||
void compressed_seq_pack(compressed_seq_t *cs, void *cs_packed);
|
||||
|
||||
/** \fn cmph_uint32 compressed_seq_packed_size(compressed_seq_t *cs);
|
||||
* \brief Return the amount of space needed to pack a compressed sequence structure.
|
||||
* \return the size of the packed compressed sequence structure or zero for failures
|
||||
*/
|
||||
cmph_uint32 compressed_seq_packed_size(compressed_seq_t *cs);
|
||||
|
||||
|
||||
/** \fn cmph_int32 compressed_seq_query_packed(void * cs_packed, cmph_uint32 idx);
|
||||
* \brief Returns the value stored at index @see idx of the packed compressed sequence structure.
|
||||
* \param cs_packed is a pointer to a contiguous memory area
|
||||
* \param idx is the index to retrieve the value from
|
||||
* \return the value stored at index @see idx of the packed compressed sequence structure
|
||||
*/
|
||||
cmph_int32 compressed_seq_query_packed(void * cs_packed, cmph_uint32 idx);
|
||||
|
||||
#endif
|
80
src/select.c
80
src/select.c
@ -32,33 +32,23 @@ static inline void select_insert_1(cmph_uint32 * buffer)
|
||||
(*buffer) |= 0x80000000;
|
||||
};
|
||||
|
||||
void select_init(select_t * sel, cmph_uint32 n, cmph_uint32 m)
|
||||
void select_init(select_t * sel)
|
||||
{
|
||||
register cmph_uint32 nbits;
|
||||
register cmph_uint32 vec_size;
|
||||
register cmph_uint32 sel_table_size;
|
||||
sel->n = n;
|
||||
sel->m = m; // n values in the range [0,m-1]
|
||||
|
||||
nbits = sel->n + sel->m;
|
||||
vec_size = (nbits + 31) >> 5; // (nbits + 31) >> 5 = (nbits + 31)/32
|
||||
|
||||
sel_table_size = (sel->n >> NBITS_STEP_SELECT_TABLE) + 1; // (sel->n >> NBITS_STEP_SELECT_TABLE) = (sel->n/STEP_SELECT_TABLE)
|
||||
|
||||
sel->bits_vec = (cmph_uint32 *)calloc(vec_size, sizeof(cmph_uint32));
|
||||
|
||||
sel->select_table = (cmph_uint32 *)calloc(sel_table_size, sizeof(cmph_uint32));
|
||||
sel->n = 0;
|
||||
sel->m = 0;
|
||||
sel->bits_vec = 0;
|
||||
sel->select_table = 0;
|
||||
};
|
||||
|
||||
double select_get_space_usage(select_t * sel)
|
||||
cmph_uint32 select_get_space_usage(select_t * sel)
|
||||
{
|
||||
register cmph_uint32 nbits;
|
||||
register cmph_uint32 vec_size;
|
||||
register cmph_uint32 sel_table_size;
|
||||
register double space_usage;
|
||||
register cmph_uint32 space_usage;
|
||||
|
||||
nbits = sel->n + sel->m;
|
||||
vec_size = (nbits + 31) >> 5; // (nbits + 31) >> 5 = (nbits + 31)/32
|
||||
vec_size = (nbits + 31) >> 5;
|
||||
sel_table_size = (sel->n >> NBITS_STEP_SELECT_TABLE) + 1; // (sel->n >> NBITS_STEP_SELECT_TABLE) = (sel->n/STEP_SELECT_TABLE)
|
||||
|
||||
space_usage = 2 * sizeof(cmph_uint32) * 8; // n and m
|
||||
@ -101,11 +91,36 @@ static inline void select_generate_sel_table(select_t * sel)
|
||||
};
|
||||
};
|
||||
|
||||
void select_generate(select_t * sel, cmph_uint32 * keys_vec)
|
||||
void select_generate(select_t * sel, cmph_uint32 * keys_vec, cmph_uint32 n, cmph_uint32 m)
|
||||
{
|
||||
register cmph_uint32 i, j, idx;
|
||||
cmph_uint32 buffer = 0;
|
||||
|
||||
register cmph_uint32 nbits;
|
||||
register cmph_uint32 vec_size;
|
||||
register cmph_uint32 sel_table_size;
|
||||
sel->n = n;
|
||||
sel->m = m; // n values in the range [0,m-1]
|
||||
|
||||
nbits = sel->n + sel->m;
|
||||
vec_size = (nbits + 31) >> 5; // (nbits + 31) >> 5 = (nbits + 31)/32
|
||||
|
||||
sel_table_size = (sel->n >> NBITS_STEP_SELECT_TABLE) + 1; // (sel->n >> NBITS_STEP_SELECT_TABLE) = (sel->n/STEP_SELECT_TABLE)
|
||||
|
||||
if(sel->bits_vec)
|
||||
{
|
||||
free(sel->bits_vec);
|
||||
}
|
||||
sel->bits_vec = (cmph_uint32 *)calloc(vec_size, sizeof(cmph_uint32));
|
||||
|
||||
if(sel->select_table)
|
||||
{
|
||||
free(sel->select_table);
|
||||
}
|
||||
sel->select_table = (cmph_uint32 *)calloc(sel_table_size, sizeof(cmph_uint32));
|
||||
|
||||
|
||||
|
||||
idx = i = j = 0;
|
||||
|
||||
for(;;)
|
||||
@ -204,12 +219,11 @@ cmph_int32 select_next_query(select_t * sel, cmph_uint32 vec_bit_idx)
|
||||
return _select_next_query((cmph_uint8 *)sel->bits_vec, vec_bit_idx);
|
||||
};
|
||||
|
||||
|
||||
void select_dump(select_t *sel, char **buf, cmph_uint32 *buflen)
|
||||
{
|
||||
register cmph_uint32 nbits = sel->n + sel->m;
|
||||
register cmph_uint32 vec_size = (nbits + 31) >> 5; // (nbits + 31) >> 5 = (nbits + 31)/32
|
||||
register cmph_uint32 sel_table_size = (sel->n >> NBITS_STEP_SELECT_TABLE) + 1; // (sel->n >> NBITS_STEP_SELECT_TABLE) = (sel->n/STEP_SELECT_TABLE)
|
||||
register cmph_uint32 vec_size = ((nbits + 31) >> 5) * sizeof(cmph_uint32); // (nbits + 31) >> 5 = (nbits + 31)/32
|
||||
register cmph_uint32 sel_table_size = ((sel->n >> NBITS_STEP_SELECT_TABLE) + 1) * sizeof(cmph_uint32); // (sel->n >> NBITS_STEP_SELECT_TABLE) = (sel->n/STEP_SELECT_TABLE)
|
||||
register cmph_uint32 pos = 0;
|
||||
|
||||
*buflen = 2*sizeof(cmph_uint32) + vec_size + sel_table_size;
|
||||
@ -246,14 +260,20 @@ void select_load(select_t * sel, const char *buf, cmph_uint32 buflen)
|
||||
pos += sizeof(cmph_uint32);
|
||||
|
||||
nbits = sel->n + sel->m;
|
||||
vec_size = (nbits + 31) >> 5; // (nbits + 31) >> 5 = (nbits + 31)/32
|
||||
sel_table_size = (sel->n >> NBITS_STEP_SELECT_TABLE) + 1; // (sel->n >> NBITS_STEP_SELECT_TABLE) = (sel->n/STEP_SELECT_TABLE)
|
||||
vec_size = ((nbits + 31) >> 5) * sizeof(cmph_uint32); // (nbits + 31) >> 5 = (nbits + 31)/32
|
||||
sel_table_size = ((sel->n >> NBITS_STEP_SELECT_TABLE) + 1) * sizeof(cmph_uint32); // (sel->n >> NBITS_STEP_SELECT_TABLE) = (sel->n/STEP_SELECT_TABLE)
|
||||
|
||||
if(sel->bits_vec) free(sel->bits_vec);
|
||||
sel->bits_vec = (cmph_uint32 *)calloc(vec_size, sizeof(cmph_uint32));
|
||||
if(sel->bits_vec)
|
||||
{
|
||||
free(sel->bits_vec);
|
||||
}
|
||||
sel->bits_vec = (cmph_uint32 *)calloc(vec_size/sizeof(cmph_uint32), sizeof(cmph_uint32));
|
||||
|
||||
if(sel->select_table) free(sel->select_table);
|
||||
sel->select_table = (cmph_uint32 *)calloc(sel_table_size, sizeof(cmph_uint32));
|
||||
if(sel->select_table)
|
||||
{
|
||||
free(sel->select_table);
|
||||
}
|
||||
sel->select_table = (cmph_uint32 *)calloc(sel_table_size/sizeof(cmph_uint32), sizeof(cmph_uint32));
|
||||
|
||||
memcpy(sel->bits_vec, buf + pos, vec_size);
|
||||
pos += vec_size;
|
||||
@ -288,8 +308,8 @@ void select_pack(select_t *sel, void *sel_packed)
|
||||
cmph_uint32 select_packed_size(select_t *sel)
|
||||
{
|
||||
register cmph_uint32 nbits = sel->n + sel->m;
|
||||
register cmph_uint32 vec_size = (nbits + 31) >> 5; // (nbits + 31) >> 5 = (nbits + 31)/32
|
||||
register cmph_uint32 sel_table_size = (sel->n >> NBITS_STEP_SELECT_TABLE) + 1; // (sel->n >> NBITS_STEP_SELECT_TABLE) = (sel->n/STEP_SELECT_TABLE)
|
||||
register cmph_uint32 vec_size = ((nbits + 31) >> 5) * sizeof(cmph_uint32); // (nbits + 31) >> 5 = (nbits + 31)/32
|
||||
register cmph_uint32 sel_table_size = ((sel->n >> NBITS_STEP_SELECT_TABLE) + 1) * sizeof(cmph_uint32); // (sel->n >> NBITS_STEP_SELECT_TABLE) = (sel->n/STEP_SELECT_TABLE)
|
||||
return 2*sizeof(cmph_uint32) + vec_size + sel_table_size;
|
||||
}
|
||||
|
||||
|
12
src/select.h
12
src/select.h
@ -1,5 +1,5 @@
|
||||
#ifndef SELECT_h
|
||||
#define SELECT_h
|
||||
#ifndef __CMPH_SELECT_H__
|
||||
#define __CMPH_SELECT_H__
|
||||
|
||||
#include "cmph_types.h"
|
||||
|
||||
@ -12,17 +12,17 @@ struct _select_t
|
||||
|
||||
typedef struct _select_t select_t;
|
||||
|
||||
void select_init(select_t * sel, cmph_uint32 n, cmph_uint32 m);
|
||||
void select_init(select_t * sel);
|
||||
|
||||
void select_destroy(select_t * sel);
|
||||
|
||||
void select_generate(select_t * sel, cmph_uint32 * keys_vec);
|
||||
void select_generate(select_t * sel, cmph_uint32 * keys_vec, cmph_uint32 n, cmph_uint32 m);
|
||||
|
||||
cmph_int32 select_query(select_t * sel, cmph_uint32 one_idx);
|
||||
|
||||
cmph_int32 select_next_query(select_t * sel, cmph_uint32 vec_bit_idx);
|
||||
|
||||
double select_get_space_usage(select_t * sel);
|
||||
cmph_uint32 select_get_space_usage(select_t * sel);
|
||||
|
||||
void select_dump(select_t *sel, char **buf, cmph_uint32 *buflen);
|
||||
|
||||
@ -30,7 +30,7 @@ void select_load(select_t * sel, const char *buf, cmph_uint32 buflen);
|
||||
|
||||
|
||||
/** \fn void select_pack(select_t *sel, void *sel_packed);
|
||||
* \brief Support the ability to pack a select structure function into a preallocated contiguous memory space pointed by sel_packed.
|
||||
* \brief Support the ability to pack a select structure into a preallocated contiguous memory space pointed by sel_packed.
|
||||
* \param sel points to the select structure
|
||||
* \param sel_packed pointer to the contiguous memory area used to store the select structure. The size of sel_packed must be at least @see select_packed_size
|
||||
*/
|
||||
|
@ -1,4 +1,4 @@
|
||||
noinst_PROGRAMS = graph_tests packed_mphf_tests mphf_tests select_tests
|
||||
noinst_PROGRAMS = graph_tests packed_mphf_tests mphf_tests select_tests compressed_seq_tests
|
||||
|
||||
INCLUDES = -I../src/
|
||||
|
||||
@ -13,3 +13,6 @@ mphf_tests_LDADD = ../src/libcmph.la
|
||||
|
||||
select_tests_SOURCES = select_tests.c
|
||||
select_tests_LDADD = ../src/libcmph.la
|
||||
|
||||
compressed_seq_tests_SOURCES = compressed_seq_tests.c
|
||||
compressed_seq_tests_LDADD = ../src/libcmph.la
|
||||
|
@ -68,9 +68,9 @@ int main(int argc, char **argv)
|
||||
char * select_packed = NULL;
|
||||
cmph_uint32 select_pack_size = 0;
|
||||
|
||||
select_init(&sel, n, m);
|
||||
select_generate(&sel, keys_vec);
|
||||
fprintf(stderr, "Space usage = %f\n", select_get_space_usage(&sel));
|
||||
select_init(&sel);
|
||||
select_generate(&sel, keys_vec, n, m);
|
||||
fprintf(stderr, "Space usage = %u\n", select_get_space_usage(&sel));
|
||||
print_values(&sel);
|
||||
|
||||
fprintf(stderr, "Dumping select structure\n");
|
||||
@ -87,6 +87,7 @@ int main(int argc, char **argv)
|
||||
|
||||
select_packed = (char *) calloc(select_pack_size, sizeof(char));
|
||||
select_pack(&sel, select_packed);
|
||||
select_destroy(&sel);
|
||||
|
||||
fprintf(stderr, "Querying the packed select structure\n");
|
||||
print_values_packed(select_packed);
|
||||
|
Loading…
Reference in New Issue
Block a user