*** empty log message ***
This commit is contained in:
parent
a80f0de19f
commit
f682fe0304
@ -8,7 +8,8 @@ libcmph_la_SOURCES = hash.c jenkins_hash.c\
|
||||
buffer_manager.c buffer_entry.c\
|
||||
brz.c fch.c fch_buckets.c \
|
||||
select.c compressed_seq.c \
|
||||
chd_ph.c miller_rabin.c
|
||||
chd.c chd_ph.c miller_rabin.c \
|
||||
compressed_rank.c
|
||||
|
||||
libcmph_la_LDFLAGS = -version-info 0:0:0
|
||||
|
||||
|
@ -60,8 +60,8 @@ libcmph_la_LIBADD =
|
||||
am_libcmph_la_OBJECTS = hash.lo jenkins_hash.lo vstack.lo vqueue.lo \
|
||||
graph.lo cmph.lo cmph_structs.lo chm.lo bmz.lo bmz8.lo bdz.lo \
|
||||
bdz_ph.lo buffer_manager.lo buffer_entry.lo brz.lo fch.lo \
|
||||
fch_buckets.lo select.lo compressed_seq.lo chd_ph.lo \
|
||||
miller_rabin.lo
|
||||
fch_buckets.lo select.lo compressed_seq.lo chd.lo chd_ph.lo \
|
||||
miller_rabin.lo compressed_rank.lo
|
||||
libcmph_la_OBJECTS = $(am_libcmph_la_OBJECTS)
|
||||
libcmph_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
|
||||
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
|
||||
@ -209,7 +209,8 @@ libcmph_la_SOURCES = hash.c jenkins_hash.c\
|
||||
buffer_manager.c buffer_entry.c\
|
||||
brz.c fch.c fch_buckets.c \
|
||||
select.c compressed_seq.c \
|
||||
chd_ph.c miller_rabin.c
|
||||
chd.c chd_ph.c miller_rabin.c \
|
||||
compressed_rank.c
|
||||
|
||||
libcmph_la_LDFLAGS = -version-info 0:0:0
|
||||
cmph_SOURCES = main.c wingetopt.h wingetopt.c
|
||||
@ -321,10 +322,12 @@ distclean-compile:
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brz.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/buffer_entry.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/buffer_manager.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chd.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chd_ph.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chm.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmph.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmph_structs.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compressed_rank.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compressed_seq.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fch.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fch_buckets.Plo@am__quote@
|
||||
|
19
src/bdz.c
19
src/bdz.c
@ -269,6 +269,13 @@ cmph_t *bdz_new(cmph_config_t *mph, double c)
|
||||
bdz_queue_t edges;
|
||||
bdz_graph3_t graph3;
|
||||
bdz_config_data_t *bdz = (bdz_config_data_t *)mph->data;
|
||||
#ifdef CMPH_TIMING
|
||||
double construction_time_begin = 0.0;
|
||||
double construction_time = 0.0;
|
||||
ELAPSED_TIME_IN_SECONDS(&construction_time_begin);
|
||||
#endif
|
||||
|
||||
|
||||
if (c == 0) c = 1.23; // validating restrictions over parameter c.
|
||||
DEBUGP("c: %f\n", c);
|
||||
bdz->m = mph->key_source->nkeys;
|
||||
@ -338,7 +345,9 @@ cmph_t *bdz_new(cmph_config_t *mph, double c)
|
||||
fprintf(stderr, "Entering ranking step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n);
|
||||
}
|
||||
ranking(bdz);
|
||||
|
||||
#ifdef CMPH_TIMING
|
||||
ELAPSED_TIME_IN_SECONDS(&construction_time);
|
||||
#endif
|
||||
mphf = (cmph_t *)malloc(sizeof(cmph_t));
|
||||
mphf->algo = mph->algo;
|
||||
bdzf = (bdz_data_t *)malloc(sizeof(bdz_data_t));
|
||||
@ -363,6 +372,14 @@ cmph_t *bdz_new(cmph_config_t *mph, double c)
|
||||
fprintf(stderr, "Successfully generated minimal perfect hash function\n");
|
||||
}
|
||||
|
||||
|
||||
#ifdef CMPH_TIMING
|
||||
register cmph_uint32 space_usage = bdz_packed_size(mphf)*8;
|
||||
register cmph_uint32 keys_per_bucket = 1;
|
||||
construction_time = construction_time - construction_time_begin;
|
||||
fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", bdz->m, bdz->m/(double)bdz->n, keys_per_bucket, construction_time, space_usage/(double)bdz->m);
|
||||
#endif
|
||||
|
||||
return mphf;
|
||||
}
|
||||
|
||||
|
16
src/bdz_ph.c
16
src/bdz_ph.c
@ -242,6 +242,12 @@ cmph_t *bdz_ph_new(cmph_config_t *mph, double c)
|
||||
bdz_ph_queue_t edges;
|
||||
bdz_ph_graph3_t graph3;
|
||||
bdz_ph_config_data_t *bdz_ph = (bdz_ph_config_data_t *)mph->data;
|
||||
#ifdef CMPH_TIMING
|
||||
double construction_time_begin = 0.0;
|
||||
double construction_time = 0.0;
|
||||
ELAPSED_TIME_IN_SECONDS(&construction_time_begin);
|
||||
#endif
|
||||
|
||||
|
||||
if (c == 0) c = 1.23; // validating restrictions over parameter c.
|
||||
DEBUGP("c: %f\n", c);
|
||||
@ -309,6 +315,9 @@ cmph_t *bdz_ph_new(cmph_config_t *mph, double c)
|
||||
|
||||
bdz_ph_optimization(bdz_ph);
|
||||
|
||||
#ifdef CMPH_TIMING
|
||||
ELAPSED_TIME_IN_SECONDS(&construction_time);
|
||||
#endif
|
||||
mphf = (cmph_t *)malloc(sizeof(cmph_t));
|
||||
mphf->algo = mph->algo;
|
||||
bdz_phf = (bdz_ph_data_t *)malloc(sizeof(bdz_ph_data_t));
|
||||
@ -328,6 +337,13 @@ cmph_t *bdz_ph_new(cmph_config_t *mph, double c)
|
||||
fprintf(stderr, "Successfully generated minimal perfect hash function\n");
|
||||
}
|
||||
|
||||
#ifdef CMPH_TIMING
|
||||
register cmph_uint32 space_usage = bdz_ph_packed_size(mphf)*8;
|
||||
register cmph_uint32 keys_per_bucket = 1;
|
||||
construction_time = construction_time - construction_time_begin;
|
||||
fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", bdz_ph->m, bdz_ph->m/(double)bdz_ph->n, keys_per_bucket, construction_time, space_usage/(double)bdz_ph->m);
|
||||
#endif
|
||||
|
||||
return mphf;
|
||||
}
|
||||
|
||||
|
@ -615,8 +615,6 @@ cmph_uint32 bmz_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke
|
||||
|
||||
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n;
|
||||
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n;
|
||||
DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
|
||||
if (h1 == h2 && ++h2 > n) h2 = 0;
|
||||
DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, g_ptr[h1], g_ptr[h2], m);
|
||||
return (g_ptr[h1] + g_ptr[h2]);
|
||||
}
|
||||
|
@ -479,11 +479,11 @@ int bmz8_dump(cmph_t *mphf, FILE *fd)
|
||||
nbytes = fwrite(&(data->m), sizeof(cmph_uint8), (size_t)1, fd);
|
||||
|
||||
nbytes = fwrite(data->g, sizeof(cmph_uint8)*(data->n), (size_t)1, fd);
|
||||
#ifdef DEBUG
|
||||
/* #ifdef DEBUG
|
||||
fprintf(stderr, "G: ");
|
||||
for (i = 0; i < data->n; ++i) fprintf(stderr, "%u ", data->g[i]);
|
||||
fprintf(stderr, "\n");
|
||||
#endif
|
||||
#endif*/
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -625,6 +625,5 @@ cmph_uint8 bmz8_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke
|
||||
register cmph_uint8 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n;
|
||||
DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
|
||||
if (h1 == h2 && ++h2 > n) h2 = 0;
|
||||
DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, g_ptr[h1], g_ptr[h2], m);
|
||||
return (g_ptr[h1] + g_ptr[h2]);
|
||||
}
|
||||
|
@ -904,7 +904,6 @@ static cmph_uint32 brz_bmz8_search_packed(cmph_uint32 *packed_mphf, const char *
|
||||
if (h1 == h2 && ++h2 >= n) h2 = 0;
|
||||
mphf_bucket = g[h1] + g[h2];
|
||||
DEBUGP("key: %s h1: %u h2: %u h0: %u\n", key, h1, h2, h0);
|
||||
DEBUGP("key: %s g[h1]: %u g[h2]: %u offset[h0]: %u edges: %u\n", key, g[h1], g[h2], >offset[h0], m);
|
||||
DEBUGP("Address: %u\n", mphf_bucket + offset[h0]);
|
||||
return (mphf_bucket + offset[h0]);
|
||||
}
|
||||
|
271
src/chd.c
Normal file
271
src/chd.c
Normal file
@ -0,0 +1,271 @@
|
||||
#include<stdio.h>
|
||||
#include<stdlib.h>
|
||||
#include<string.h>
|
||||
#include<math.h>
|
||||
#include<time.h>
|
||||
#include<assert.h>
|
||||
#include<limits.h>
|
||||
|
||||
#include "cmph_structs.h"
|
||||
#include "chd_structs.h"
|
||||
#include "chd.h"
|
||||
|
||||
//#define DEBUG
|
||||
#include "debug.h"
|
||||
|
||||
chd_config_data_t *chd_config_new(cmph_config_t *mph)
|
||||
{
|
||||
cmph_io_adapter_t *key_source = mph->key_source;
|
||||
chd_config_data_t *chd;
|
||||
chd = (chd_config_data_t *)malloc(sizeof(chd_config_data_t));
|
||||
assert(chd);
|
||||
memset(chd, 0, sizeof(chd_config_data_t));
|
||||
|
||||
chd->chd_ph = cmph_config_new(key_source);
|
||||
cmph_config_set_algo(chd->chd_ph, CMPH_CHD_PH);
|
||||
|
||||
return chd;
|
||||
}
|
||||
|
||||
void chd_config_destroy(cmph_config_t *mph)
|
||||
{
|
||||
chd_config_data_t *data = (chd_config_data_t *) mph->data;
|
||||
DEBUGP("Destroying algorithm dependent data\n");
|
||||
if(data->chd_ph)
|
||||
{
|
||||
cmph_config_destroy(data->chd_ph);
|
||||
data->chd_ph = NULL;
|
||||
}
|
||||
free(data);
|
||||
}
|
||||
|
||||
|
||||
void chd_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
|
||||
{
|
||||
chd_config_data_t *data = (chd_config_data_t *) mph->data;
|
||||
cmph_config_set_hashfuncs(data->chd_ph, hashfuncs);
|
||||
}
|
||||
|
||||
|
||||
void chd_config_set_b(cmph_config_t *mph, cmph_uint32 keys_per_bucket)
|
||||
{
|
||||
chd_config_data_t *data = (chd_config_data_t *) mph->data;
|
||||
cmph_config_set_b(data->chd_ph, keys_per_bucket);
|
||||
}
|
||||
|
||||
|
||||
void chd_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin)
|
||||
{
|
||||
chd_config_data_t *data = (chd_config_data_t *) mph->data;
|
||||
cmph_config_set_keys_per_bin(data->chd_ph, keys_per_bin);
|
||||
}
|
||||
|
||||
|
||||
cmph_t *chd_new(cmph_config_t *mph, double c)
|
||||
{
|
||||
cmph_t *mphf = NULL;
|
||||
chd_data_t *chdf = NULL;
|
||||
chd_config_data_t *chd = (chd_config_data_t *)mph->data;
|
||||
chd_ph_config_data_t * chd_ph = (chd_ph_config_data_t *)chd->chd_ph->data;
|
||||
compressed_rank_t cr;
|
||||
|
||||
register cmph_t * chd_phf = NULL;
|
||||
register cmph_uint32 packed_chd_phf_size = 0;
|
||||
cmph_uint8 * packed_chd_phf = NULL;
|
||||
|
||||
register cmph_uint32 packed_cr_size = 0;
|
||||
cmph_uint8 * packed_cr = NULL;
|
||||
|
||||
register cmph_uint32 i, idx, nkeys, nvals, nbins;
|
||||
cmph_uint32 * vals_table = NULL;
|
||||
register cmph_uint8 * occup_table = NULL;
|
||||
#ifdef CMPH_TIMING
|
||||
double construction_time_begin = 0.0;
|
||||
double construction_time = 0.0;
|
||||
ELAPSED_TIME_IN_SECONDS(&construction_time_begin);
|
||||
#endif
|
||||
|
||||
cmph_config_set_verbosity(chd->chd_ph, mph->verbosity);
|
||||
cmph_config_set_graphsize(chd->chd_ph, c);
|
||||
|
||||
if (mph->verbosity)
|
||||
{
|
||||
fprintf(stderr, "Generating a CHD_PH perfect hash function with a load factor equal to %.3f\n", c);
|
||||
}
|
||||
|
||||
chd_phf = cmph_new(chd->chd_ph);
|
||||
|
||||
if(chd_phf == NULL)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
packed_chd_phf_size = cmph_packed_size(chd_phf);
|
||||
DEBUGP("packed_chd_phf_size = %u\n", packed_chd_phf_size);
|
||||
|
||||
/* Make sure that we have enough space to pack the mphf. */
|
||||
packed_chd_phf = calloc((size_t)packed_chd_phf_size,(size_t)1);
|
||||
|
||||
/* Pack the mphf. */
|
||||
cmph_pack(chd_phf, packed_chd_phf);
|
||||
|
||||
cmph_destroy(chd_phf);
|
||||
|
||||
|
||||
if (mph->verbosity)
|
||||
{
|
||||
fprintf(stderr, "Compressing the range of the resulting CHD_PH perfect hash function\n");
|
||||
}
|
||||
|
||||
compressed_rank_init(&cr);
|
||||
nbins = chd_ph->n;
|
||||
nkeys = chd_ph->m;
|
||||
nvals = nbins - nkeys;
|
||||
|
||||
vals_table = (cmph_uint32 *)calloc(nvals, sizeof(cmph_uint32));
|
||||
occup_table = chd_ph->occup_table;
|
||||
|
||||
for(i = 0, idx = 0; i < nbins; i++)
|
||||
{
|
||||
if(occup_table[i] == 0)
|
||||
{
|
||||
vals_table[idx++] = i;
|
||||
}
|
||||
}
|
||||
|
||||
compressed_rank_generate(&cr, vals_table, nvals);
|
||||
free(vals_table);
|
||||
|
||||
packed_cr_size = compressed_rank_packed_size(&cr);
|
||||
packed_cr = (cmph_uint8 *) calloc(packed_cr_size, sizeof(cmph_uint8));
|
||||
compressed_rank_pack(&cr, packed_cr);
|
||||
compressed_rank_destroy(&cr);
|
||||
|
||||
mphf = (cmph_t *)malloc(sizeof(cmph_t));
|
||||
mphf->algo = mph->algo;
|
||||
chdf = (chd_data_t *)malloc(sizeof(chd_data_t));
|
||||
|
||||
chdf->packed_cr = packed_cr;
|
||||
packed_cr = NULL; //transfer memory ownership
|
||||
|
||||
chdf->packed_chd_phf = packed_chd_phf;
|
||||
packed_chd_phf = NULL; //transfer memory ownership
|
||||
|
||||
chdf->packed_chd_phf_size = packed_chd_phf_size;
|
||||
chdf->packed_cr_size = packed_cr_size;
|
||||
|
||||
mphf->data = chdf;
|
||||
mphf->size = nkeys;
|
||||
|
||||
DEBUGP("Successfully generated minimal perfect hash\n");
|
||||
if (mph->verbosity)
|
||||
{
|
||||
fprintf(stderr, "Successfully generated minimal perfect hash function\n");
|
||||
}
|
||||
#ifdef CMPH_TIMING
|
||||
ELAPSED_TIME_IN_SECONDS(&construction_time);
|
||||
register cmph_uint32 space_usage = chd_packed_size(mphf)*8;
|
||||
construction_time = construction_time - construction_time_begin;
|
||||
fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", nkeys, c, chd_ph->keys_per_bucket, construction_time, space_usage/(double)nkeys);
|
||||
#endif
|
||||
|
||||
return mphf;
|
||||
}
|
||||
|
||||
void chd_load(FILE *fd, cmph_t *mphf)
|
||||
{
|
||||
register cmph_uint32 nbytes;
|
||||
chd_data_t *chd = (chd_data_t *)malloc(sizeof(chd_data_t));
|
||||
|
||||
DEBUGP("Loading chd mphf\n");
|
||||
mphf->data = chd;
|
||||
|
||||
nbytes = fread(&chd->packed_chd_phf_size, sizeof(cmph_uint32), (size_t)1, fd);
|
||||
DEBUGP("Loading CHD_PH perfect hash function with %u bytes to disk\n", chd->packed_chd_phf_size);
|
||||
chd->packed_chd_phf = (cmph_uint8 *) calloc((size_t)chd->packed_chd_phf_size,(size_t)1);
|
||||
nbytes = fread(chd->packed_chd_phf, chd->packed_chd_phf_size, (size_t)1, fd);
|
||||
|
||||
nbytes = fread(&chd->packed_cr_size, sizeof(cmph_uint32), (size_t)1, fd);
|
||||
DEBUGP("Loading Compressed rank structure, which has %u bytes\n", chd->packed_cr_size);
|
||||
chd->packed_cr = (cmph_uint8 *) calloc((size_t)chd->packed_cr_size, (size_t)1);
|
||||
nbytes = fread(chd->packed_cr, chd->packed_cr_size, (size_t)1, fd);
|
||||
}
|
||||
|
||||
int chd_dump(cmph_t *mphf, FILE *fd)
|
||||
{
|
||||
register cmph_uint32 nbytes;
|
||||
chd_data_t *data = (chd_data_t *)mphf->data;
|
||||
|
||||
__cmph_dump(mphf, fd);
|
||||
// Dumping CHD_PH perfect hash function
|
||||
|
||||
DEBUGP("Dumping CHD_PH perfect hash function with %u bytes to disk\n", data->packed_chd_phf_size);
|
||||
nbytes = fwrite(&data->packed_chd_phf_size, sizeof(cmph_uint32), (size_t)1, fd);
|
||||
nbytes = fwrite(data->packed_chd_phf, data->packed_chd_phf_size, (size_t)1, fd);
|
||||
|
||||
DEBUGP("Dumping compressed rank structure with %u bytes to disk\n", buflen);
|
||||
nbytes = fwrite(&data->packed_cr_size, sizeof(cmph_uint32), (size_t)1, fd);
|
||||
nbytes = fwrite(data->packed_cr, data->packed_cr_size, (size_t)1, fd);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
void chd_destroy(cmph_t *mphf)
|
||||
{
|
||||
chd_data_t *data = (chd_data_t *)mphf->data;
|
||||
free(data->packed_chd_phf);
|
||||
free(data->packed_cr);
|
||||
free(data);
|
||||
free(mphf);
|
||||
}
|
||||
|
||||
static inline cmph_uint32 _chd_search(void * packed_chd_phf, void * packed_cr, const char *key, cmph_uint32 keylen)
|
||||
{
|
||||
register cmph_uint32 bin_idx = cmph_search_packed(packed_chd_phf, key, keylen);
|
||||
register cmph_uint32 rank = compressed_rank_query_packed(packed_cr, bin_idx);
|
||||
return bin_idx - rank;
|
||||
}
|
||||
|
||||
cmph_uint32 chd_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||
{
|
||||
register chd_data_t * chd = mphf->data;
|
||||
return _chd_search(chd->packed_chd_phf, chd->packed_cr, key, keylen);
|
||||
}
|
||||
|
||||
void chd_pack(cmph_t *mphf, void *packed_mphf)
|
||||
{
|
||||
chd_data_t *data = (chd_data_t *)mphf->data;
|
||||
cmph_uint32 * ptr = packed_mphf;
|
||||
cmph_uint8 * ptr8;
|
||||
|
||||
// packing packed_cr_size and packed_cr
|
||||
*ptr = data->packed_cr_size;
|
||||
ptr8 = (cmph_uint8 *) (ptr + 1);
|
||||
|
||||
memcpy(ptr8, data->packed_cr, data->packed_cr_size);
|
||||
ptr8 += data->packed_cr_size;
|
||||
|
||||
ptr = (cmph_uint32 *) ptr8;
|
||||
*ptr = data->packed_chd_phf_size;
|
||||
|
||||
ptr8 = (cmph_uint8 *) (ptr + 1);
|
||||
memcpy(ptr8, data->packed_chd_phf, data->packed_chd_phf_size);
|
||||
}
|
||||
|
||||
cmph_uint32 chd_packed_size(cmph_t *mphf)
|
||||
{
|
||||
register chd_data_t *data = (chd_data_t *)mphf->data;
|
||||
return (sizeof(CMPH_ALGO) + 2*sizeof(cmph_uint32) + data->packed_cr_size + data->packed_chd_phf_size);
|
||||
|
||||
}
|
||||
|
||||
cmph_uint32 chd_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
|
||||
{
|
||||
|
||||
register cmph_uint32 * ptr = packed_mphf;
|
||||
register cmph_uint32 packed_cr_size = *ptr++;
|
||||
register cmph_uint8 * packed_chd_phf = ((cmph_uint8 *) ptr) + packed_cr_size + sizeof(cmph_uint32);
|
||||
return _chd_search(packed_chd_phf, ptr, key, keylen);
|
||||
}
|
||||
|
||||
|
59
src/chd.h
Normal file
59
src/chd.h
Normal file
@ -0,0 +1,59 @@
|
||||
#ifndef _CMPH_CHD_H__
|
||||
#define _CMPH_CHD_H__
|
||||
|
||||
#include "cmph.h"
|
||||
|
||||
typedef struct __chd_data_t chd_data_t;
|
||||
typedef struct __chd_config_data_t chd_config_data_t;
|
||||
|
||||
/* Config API */
|
||||
chd_config_data_t *chd_config_new(cmph_config_t * mph);
|
||||
void chd_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
|
||||
|
||||
/** \fn void chd_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin);
|
||||
* \brief Allows to set the number of keys per bin.
|
||||
* \param mph pointer to the configuration structure
|
||||
* \param keys_per_bin value for the number of keys per bin
|
||||
*/
|
||||
void chd_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin);
|
||||
|
||||
/** \fn void chd_config_set_b(cmph_config_t *mph, cmph_uint32 keys_per_bucket);
|
||||
* \brief Allows to set the number of keys per bucket.
|
||||
* \param mph pointer to the configuration structure
|
||||
* \param keys_per_bucket value for the number of keys per bucket
|
||||
*/
|
||||
void chd_config_set_b(cmph_config_t *mph, cmph_uint32 keys_per_bucket);
|
||||
void chd_config_destroy(cmph_config_t *mph);
|
||||
|
||||
|
||||
/* Chd algorithm API */
|
||||
cmph_t *chd_new(cmph_config_t *mph, double c);
|
||||
void chd_load(FILE *fd, cmph_t *mphf);
|
||||
int chd_dump(cmph_t *mphf, FILE *fd);
|
||||
void chd_destroy(cmph_t *mphf);
|
||||
cmph_uint32 chd_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
|
||||
|
||||
/** \fn void chd_pack(cmph_t *mphf, void *packed_mphf);
|
||||
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||
* \param mphf pointer to the resulting mphf
|
||||
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||
*/
|
||||
void chd_pack(cmph_t *mphf, void *packed_mphf);
|
||||
|
||||
/** \fn cmph_uint32 chd_packed_size(cmph_t *mphf);
|
||||
* \brief Return the amount of space needed to pack mphf.
|
||||
* \param mphf pointer to a mphf
|
||||
* \return the size of the packed function or zero for failures
|
||||
*/
|
||||
cmph_uint32 chd_packed_size(cmph_t *mphf);
|
||||
|
||||
/** cmph_uint32 chd_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||
* \brief Use the packed mphf to do a search.
|
||||
* \param packed_mphf pointer to the packed mphf
|
||||
* \param key key to be hashed
|
||||
* \param keylen key legth in bytes
|
||||
* \return The mphf value
|
||||
*/
|
||||
cmph_uint32 chd_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||
|
||||
#endif
|
114
src/chd_ph.c
114
src/chd_ph.c
@ -11,7 +11,7 @@
|
||||
#include "chd_ph.h"
|
||||
#include"miller_rabin.h"
|
||||
|
||||
#define DEBUG
|
||||
//#define DEBUG
|
||||
#include "debug.h"
|
||||
|
||||
// NO_ELEMENT is equivalent to null pointer
|
||||
@ -99,8 +99,7 @@ static inline cmph_uint8 chd_ph_mapping(cmph_config_t *mph, chd_ph_bucket_t * bu
|
||||
static inline cmph_uint32 * chd_ph_ordering(chd_ph_bucket_t * buckets, cmph_uint32 nbuckets, cmph_uint32 max_bucket_size);
|
||||
|
||||
static inline cmph_uint8 chd_ph_searching(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, cmph_uint32 max_bucket_size,
|
||||
cmph_uint32 *sorted_lists, cmph_uint32 max_probes, cmph_uint32 * disp_table,
|
||||
cmph_uint8 * occup_table);
|
||||
cmph_uint32 *sorted_lists, cmph_uint32 max_probes, cmph_uint32 * disp_table);
|
||||
|
||||
static inline double chd_ph_space_lower_bound(cmph_uint32 _n, cmph_uint32 _r)
|
||||
{
|
||||
@ -146,10 +145,8 @@ chd_ph_config_data_t *chd_ph_config_new()
|
||||
chd_ph->use_h = 1;
|
||||
chd_ph->keys_per_bin = 1;
|
||||
chd_ph->keys_per_bucket = 4;
|
||||
chd_ph->occup_table = 0;
|
||||
|
||||
//The following fields are used just for statistics
|
||||
chd_ph->space_usage = 0;
|
||||
chd_ph->entropy = 0.0;
|
||||
return chd_ph;
|
||||
}
|
||||
|
||||
@ -157,6 +154,11 @@ void chd_ph_config_destroy(cmph_config_t *mph)
|
||||
{
|
||||
chd_ph_config_data_t *data = (chd_ph_config_data_t *) mph->data;
|
||||
DEBUGP("Destroying algorithm dependent data\n");
|
||||
if(data->occup_table)
|
||||
{
|
||||
free(data->occup_table);
|
||||
data->occup_table = NULL;
|
||||
}
|
||||
free(data);
|
||||
}
|
||||
|
||||
@ -286,8 +288,8 @@ cmph_uint32 * chd_ph_ordering(chd_ph_bucket_t * buckets, cmph_uint32 nbuckets, c
|
||||
return sorted_lists;
|
||||
}
|
||||
|
||||
static inline cmph_uint8 place_bucket_probe(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, cmph_uint8 * occup_table,
|
||||
cmph_uint32 probe0_num, cmph_uint32 probe1_num, cmph_uint32 bucket_num)
|
||||
static inline cmph_uint8 place_bucket_probe(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, cmph_uint32 probe0_num,
|
||||
cmph_uint32 probe1_num, cmph_uint32 bucket_num)
|
||||
{
|
||||
register cmph_uint32 i;
|
||||
register cmph_uint32 size = buckets[bucket_num].size;
|
||||
@ -300,11 +302,11 @@ static inline cmph_uint8 place_bucket_probe(chd_ph_config_data_t *chd_ph, chd_ph
|
||||
{
|
||||
position = (item->f + ((cmph_uint64)item->h)*probe0_num + probe1_num) % chd_ph->n;
|
||||
|
||||
if(occup_table[position] >= chd_ph->keys_per_bin)
|
||||
if(chd_ph->occup_table[position] >= chd_ph->keys_per_bin)
|
||||
{
|
||||
break;
|
||||
}
|
||||
occup_table[position]++;
|
||||
(chd_ph->occup_table[position])++;
|
||||
|
||||
item = item->next;
|
||||
};
|
||||
@ -319,7 +321,7 @@ static inline cmph_uint8 place_bucket_probe(chd_ph_config_data_t *chd_ph, chd_ph
|
||||
break;
|
||||
}
|
||||
position = (item->f + ((cmph_uint64 )item->h) * probe0_num + probe1_num) % chd_ph->n;
|
||||
occup_table[position]--;
|
||||
(chd_ph->occup_table[position])--;
|
||||
item = item->next;
|
||||
i--;
|
||||
};
|
||||
@ -329,7 +331,7 @@ static inline cmph_uint8 place_bucket_probe(chd_ph_config_data_t *chd_ph, chd_ph
|
||||
};
|
||||
|
||||
static inline cmph_uint8 place_bucket(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, cmph_uint32 max_probes,
|
||||
cmph_uint32 * disp_table, cmph_uint8 * occup_table, cmph_uint32 bucket_num)
|
||||
cmph_uint32 * disp_table, cmph_uint32 bucket_num)
|
||||
|
||||
{
|
||||
register cmph_uint32 probe0_num, probe1_num, probe_num;
|
||||
@ -339,7 +341,7 @@ static inline cmph_uint8 place_bucket(chd_ph_config_data_t *chd_ph, chd_ph_bucke
|
||||
|
||||
while(1)
|
||||
{
|
||||
if(place_bucket_probe(chd_ph, buckets, occup_table, probe0_num, probe1_num, bucket_num))
|
||||
if(place_bucket_probe(chd_ph, buckets, probe0_num, probe1_num, bucket_num))
|
||||
{
|
||||
disp_table[bucket_num] = probe0_num + probe1_num * chd_ph->n;
|
||||
return 1;
|
||||
@ -360,8 +362,7 @@ static inline cmph_uint8 place_bucket(chd_ph_config_data_t *chd_ph, chd_ph_bucke
|
||||
};
|
||||
|
||||
static inline cmph_uint8 place_buckets1(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, cmph_uint32 max_bucket_size,
|
||||
cmph_uint32 *sorted_lists, cmph_uint32 max_probes, cmph_uint32 * disp_table,
|
||||
cmph_uint8 * occup_table)
|
||||
cmph_uint32 *sorted_lists, cmph_uint32 max_probes, cmph_uint32 * disp_table)
|
||||
{
|
||||
register cmph_uint32 i = 0;
|
||||
register cmph_uint32 curr_bucket = 0;
|
||||
@ -371,7 +372,7 @@ static inline cmph_uint8 place_buckets1(chd_ph_config_data_t *chd_ph, chd_ph_buc
|
||||
curr_bucket = sorted_lists[i];
|
||||
while(curr_bucket != NO_ELEMENT)
|
||||
{
|
||||
if(!place_bucket(chd_ph, buckets, max_probes, disp_table, occup_table, curr_bucket))
|
||||
if(!place_bucket(chd_ph, buckets, max_probes, disp_table, curr_bucket))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@ -383,8 +384,7 @@ static inline cmph_uint8 place_buckets1(chd_ph_config_data_t *chd_ph, chd_ph_buc
|
||||
};
|
||||
|
||||
static inline cmph_uint8 place_buckets2(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, cmph_uint32 max_bucket_size,
|
||||
cmph_uint32 *sorted_lists, cmph_uint32 max_probes, cmph_uint32 * disp_table,
|
||||
cmph_uint8 * occup_table)
|
||||
cmph_uint32 *sorted_lists, cmph_uint32 max_probes, cmph_uint32 * disp_table)
|
||||
{
|
||||
register cmph_uint32 i;
|
||||
register cmph_uint32 curr_bucket, prev_bucket;
|
||||
@ -402,7 +402,7 @@ static inline cmph_uint8 place_buckets2(chd_ph_config_data_t *chd_ph, chd_ph_buc
|
||||
while(curr_bucket != NO_ELEMENT)
|
||||
{
|
||||
// if bucket is successfully placed remove it from list
|
||||
if(place_bucket_probe(chd_ph, buckets, occup_table, probe0_num, probe1_num, curr_bucket))
|
||||
if(place_bucket_probe(chd_ph, buckets, probe0_num, probe1_num, curr_bucket))
|
||||
{
|
||||
disp_table[curr_bucket] = probe0_num + probe1_num * chd_ph->n;
|
||||
// DEBUGP("BUCKET %u PLACED --- DISPLACEMENT = %u\n", curr_bucket, disp_table[curr_bucket]);
|
||||
@ -440,29 +440,28 @@ static inline cmph_uint8 place_buckets2(chd_ph_config_data_t *chd_ph, chd_ph_buc
|
||||
};
|
||||
|
||||
cmph_uint8 chd_ph_searching(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, cmph_uint32 max_bucket_size,
|
||||
cmph_uint32 *sorted_lists, cmph_uint32 max_probes, cmph_uint32 * disp_table,
|
||||
cmph_uint8 * occup_table)
|
||||
cmph_uint32 *sorted_lists, cmph_uint32 max_probes, cmph_uint32 * disp_table)
|
||||
{
|
||||
if(chd_ph->use_h)
|
||||
{
|
||||
return place_buckets2(chd_ph, buckets, max_bucket_size, sorted_lists, max_probes, disp_table, occup_table);
|
||||
return place_buckets2(chd_ph, buckets, max_bucket_size, sorted_lists, max_probes, disp_table);
|
||||
}
|
||||
else
|
||||
{
|
||||
return place_buckets1(chd_ph, buckets, max_bucket_size, sorted_lists, max_probes, disp_table, occup_table);
|
||||
return place_buckets1(chd_ph, buckets, max_bucket_size, sorted_lists, max_probes, disp_table);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static inline cmph_uint8 chd_ph_check_bin_hashing(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets,
|
||||
cmph_uint32 * disp_table, cmph_uint8 * occup_table)
|
||||
cmph_uint32 * disp_table)
|
||||
{
|
||||
register cmph_uint32 i, j;
|
||||
register cmph_uint32 position, probe0_num, probe1_num;
|
||||
register cmph_uint32 m = 0;
|
||||
register chd_ph_item_t * item;
|
||||
|
||||
memset(occup_table, 0, chd_ph->n);
|
||||
memset(chd_ph->occup_table, 0, chd_ph->n);
|
||||
for(i = 0; i < chd_ph->nbuckets; i++)
|
||||
{
|
||||
j = buckets[i].size;
|
||||
@ -477,11 +476,11 @@ static inline cmph_uint8 chd_ph_check_bin_hashing(chd_ph_config_data_t *chd_ph,
|
||||
}
|
||||
m++;
|
||||
position = (item->f + ((cmph_uint64 )item->h) * probe0_num + probe1_num) % chd_ph->n;
|
||||
if(occup_table[position] >= chd_ph->keys_per_bin)
|
||||
if(chd_ph->occup_table[position] >= chd_ph->keys_per_bin)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
occup_table[position]++;
|
||||
(chd_ph->occup_table[position])++;
|
||||
item = item->next;
|
||||
};
|
||||
};
|
||||
@ -498,7 +497,7 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
|
||||
|
||||
register double load_factor = c;
|
||||
register cmph_uint8 searching_success = 0;
|
||||
register cmph_uint32 max_probes = 1 << 18; // default value for max_probes
|
||||
register cmph_uint32 max_probes = 1 << 20; // default value for max_probes
|
||||
register cmph_uint32 iterations = 100;
|
||||
chd_ph_bucket_t * buckets = NULL;
|
||||
chd_ph_item_t * items = NULL;
|
||||
@ -506,8 +505,14 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
|
||||
cmph_uint32 max_bucket_size = 0;
|
||||
cmph_uint32 * sorted_lists = NULL;
|
||||
cmph_uint32 * disp_table = NULL;
|
||||
cmph_uint8 * occup_table;
|
||||
|
||||
register double space_lower_bound = 0;
|
||||
#ifdef CMPH_TIMING
|
||||
double construction_time_begin = 0.0;
|
||||
double construction_time = 0.0;
|
||||
ELAPSED_TIME_IN_SECONDS(&construction_time_begin);
|
||||
#endif
|
||||
|
||||
|
||||
chd_ph->m = mph->key_source->nkeys;
|
||||
DEBUGP("m = %u\n", chd_ph->m);
|
||||
|
||||
@ -539,10 +544,14 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
|
||||
};
|
||||
|
||||
DEBUGP("n = %u \n", chd_ph->n);
|
||||
|
||||
if(mph->verbosity && chd_ph->keys_per_bin == 1)
|
||||
if(chd_ph->keys_per_bin == 1)
|
||||
{
|
||||
fprintf(stderr, "space lower bound is %.3f bits per key\n", chd_ph_space_lower_bound(chd_ph->m, chd_ph->n));
|
||||
space_lower_bound = chd_ph_space_lower_bound(chd_ph->m, chd_ph->n);
|
||||
}
|
||||
|
||||
if(mph->verbosity)
|
||||
{
|
||||
fprintf(stderr, "space lower bound is %.3f bits per key\n", space_lower_bound);
|
||||
}
|
||||
|
||||
// We allocate the working tables
|
||||
@ -550,7 +559,7 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
|
||||
items = (chd_ph_item_t *) calloc(chd_ph->m, sizeof(chd_ph_item_t));
|
||||
|
||||
max_probes = (cmph_uint32)(((log(chd_ph->m)/log(2))/20) * max_probes);
|
||||
occup_table = (cmph_uint8 *) calloc(chd_ph->n, sizeof(cmph_uint8));
|
||||
chd_ph->occup_table = (cmph_uint8 *) calloc(chd_ph->n, sizeof(cmph_uint8));
|
||||
disp_table = (cmph_uint32 *) calloc(chd_ph->nbuckets, sizeof(cmph_uint32));
|
||||
//
|
||||
// init_genrand(time(0));
|
||||
@ -588,12 +597,12 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
|
||||
fprintf(stderr, "Starting searching step\n");
|
||||
}
|
||||
|
||||
searching_success = chd_ph_searching(chd_ph, buckets, max_bucket_size, sorted_lists, max_probes, disp_table, occup_table);
|
||||
searching_success = chd_ph_searching(chd_ph, buckets, max_bucket_size, sorted_lists, max_probes, disp_table);
|
||||
|
||||
if(searching_success) break;
|
||||
|
||||
// reset occup_table
|
||||
memset(occup_table, 0, chd_ph->n);
|
||||
memset(chd_ph->occup_table, 0, chd_ph->n);
|
||||
if(iterations == 0)
|
||||
{
|
||||
// Cleanup memory
|
||||
@ -606,16 +615,15 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
|
||||
};
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
chd_ph->entropy = chd_ph_get_entropy(disp_table, chd_ph->nbuckets, max_probes);
|
||||
DEBUGP("Entropy = %.4f\n", chd_ph->entropy/chd_ph->m);
|
||||
|
||||
if(!chd_ph_check_bin_hashing(chd_ph, buckets, disp_table, occup_table))
|
||||
#ifdef DEBUG
|
||||
{
|
||||
|
||||
DEBUGP("Error for bin packing generation");
|
||||
return NULL;
|
||||
};
|
||||
if(!chd_ph_check_bin_hashing(chd_ph, buckets, disp_table))
|
||||
{
|
||||
|
||||
DEBUGP("Error for bin packing generation");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (mph->verbosity)
|
||||
@ -630,16 +638,18 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
|
||||
chd_ph->cs = (compressed_seq_t *) calloc(1, sizeof(compressed_seq_t));
|
||||
compressed_seq_init(chd_ph->cs);
|
||||
compressed_seq_generate(chd_ph->cs, disp_table, chd_ph->nbuckets);
|
||||
chd_ph->space_usage = compressed_seq_get_space_usage(chd_ph->cs);
|
||||
chd_ph->space_usage += 64;
|
||||
DEBUGP("space_usage/key = %.4f\n", chd_ph->space_usage/(double)chd_ph->m);
|
||||
|
||||
#ifdef CMPH_TIMING
|
||||
ELAPSED_TIME_IN_SECONDS(&construction_time);
|
||||
register double entropy = chd_ph_get_entropy(disp_table, chd_ph->nbuckets, max_probes);
|
||||
DEBUGP("Entropy = %.4f\n", entropy/chd_ph->m);
|
||||
#endif
|
||||
|
||||
cleanup:
|
||||
chd_ph_bucket_destroy(buckets);
|
||||
free(items);
|
||||
free(sorted_lists);
|
||||
free(disp_table);
|
||||
free(occup_table);
|
||||
if(failure)
|
||||
{
|
||||
if(chd_ph->hl)
|
||||
@ -669,6 +679,12 @@ cleanup:
|
||||
{
|
||||
fprintf(stderr, "Successfully generated minimal perfect hash function\n");
|
||||
}
|
||||
|
||||
#ifdef CMPH_TIMING
|
||||
register cmph_uint32 space_usage = chd_ph_packed_size(mphf)*8;
|
||||
construction_time = construction_time - construction_time_begin;
|
||||
fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\t%.4f\t%.4f\n", chd_ph->m, load_factor, chd_ph->keys_per_bucket, construction_time, space_usage/(double)chd_ph->m, space_lower_bound, entropy/chd_ph->m);
|
||||
#endif
|
||||
|
||||
return mphf;
|
||||
}
|
||||
|
21
src/chd_structs.h
Normal file
21
src/chd_structs.h
Normal file
@ -0,0 +1,21 @@
|
||||
#ifndef __CMPH_CHD_STRUCTS_H__
|
||||
#define __CMPH_CHD_STRUCTS_H__
|
||||
|
||||
#include "chd_structs_ph.h"
|
||||
#include "chd_ph.h"
|
||||
#include "compressed_rank.h"
|
||||
|
||||
struct __chd_data_t
|
||||
{
|
||||
cmph_uint32 packed_cr_size;
|
||||
cmph_uint8 * packed_cr; // packed compressed rank structure to control the number of zeros in a bit vector
|
||||
|
||||
cmph_uint32 packed_chd_phf_size;
|
||||
cmph_uint8 * packed_chd_phf;
|
||||
};
|
||||
|
||||
struct __chd_config_data_t
|
||||
{
|
||||
cmph_config_t *chd_ph; // chd_ph algorithm must be used here
|
||||
};
|
||||
#endif
|
@ -24,9 +24,6 @@ struct __chd_ph_config_data_t
|
||||
cmph_uint8 use_h; // flag to indicate the of use of a heuristic (use_h = 1)
|
||||
cmph_uint32 keys_per_bin;//maximum number of keys per bin
|
||||
cmph_uint32 keys_per_bucket; // average number of keys per bucket
|
||||
|
||||
//The following fields are used just for statistics
|
||||
cmph_uint32 space_usage;
|
||||
double entropy;
|
||||
cmph_uint8 *occup_table; // table that indicates occupied positions
|
||||
};
|
||||
#endif
|
||||
|
@ -225,11 +225,11 @@ int chm_dump(cmph_t *mphf, FILE *fd)
|
||||
nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
|
||||
|
||||
nbytes = fwrite(data->g, sizeof(cmph_uint32)*data->n, (size_t)1, fd);
|
||||
#ifdef DEBUG
|
||||
/* #ifdef DEBUG
|
||||
fprintf(stderr, "G: ");
|
||||
for (i = 0; i < data->n; ++i) fprintf(stderr, "%u ", data->g[i]);
|
||||
fprintf(stderr, "\n");
|
||||
#endif
|
||||
#endif*/
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
46
src/cmph.c
46
src/cmph.c
@ -8,6 +8,7 @@
|
||||
#include "bdz.h" /* included -- Fabiano */
|
||||
#include "bdz_ph.h" /* included -- Fabiano */
|
||||
#include "chd_ph.h" /* included -- Fabiano */
|
||||
#include "chd.h" /* included -- Fabiano */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
@ -15,7 +16,7 @@
|
||||
//#define DEBUG
|
||||
#include "debug.h"
|
||||
|
||||
const char *cmph_names[] = {"bmz", "bmz8", "chm", "brz", "fch", "bdz", "bdz_ph", "chd_ph", NULL }; /* included -- Fabiano */
|
||||
const char *cmph_names[] = {"bmz", "bmz8", "chm", "brz", "fch", "bdz", "bdz_ph", "chd_ph", "chd", NULL }; /* included -- Fabiano */
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@ -325,6 +326,9 @@ void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo)
|
||||
case CMPH_CHD_PH:
|
||||
chd_ph_config_destroy(mph);
|
||||
break;
|
||||
case CMPH_CHD:
|
||||
chd_config_destroy(mph);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
@ -354,6 +358,9 @@ void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo)
|
||||
case CMPH_CHD_PH:
|
||||
mph->data = chd_ph_config_new();
|
||||
break;
|
||||
case CMPH_CHD:
|
||||
mph->data = chd_config_new(mph);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
@ -392,6 +399,10 @@ void cmph_config_set_b(cmph_config_t *mph, cmph_uint32 b)
|
||||
{
|
||||
chd_ph_config_set_b(mph, b);
|
||||
}
|
||||
else if (mph->algo == CMPH_CHD)
|
||||
{
|
||||
chd_config_set_b(mph, b);
|
||||
}
|
||||
}
|
||||
|
||||
void cmph_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin)
|
||||
@ -400,6 +411,10 @@ void cmph_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin)
|
||||
{
|
||||
chd_ph_config_set_keys_per_bin(mph, keys_per_bin);
|
||||
}
|
||||
else if (mph->algo == CMPH_CHD)
|
||||
{
|
||||
chd_config_set_keys_per_bin(mph, keys_per_bin);
|
||||
}
|
||||
}
|
||||
|
||||
void cmph_config_set_memory_availability(cmph_config_t *mph, cmph_uint32 memory_availability)
|
||||
@ -441,6 +456,9 @@ void cmph_config_destroy(cmph_config_t *mph)
|
||||
case CMPH_CHD_PH: /* included -- Fabiano */
|
||||
chd_ph_config_destroy(mph);
|
||||
break;
|
||||
case CMPH_CHD: /* included -- Fabiano */
|
||||
chd_config_destroy(mph);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
@ -481,6 +499,9 @@ void cmph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
|
||||
case CMPH_CHD_PH: /* included -- Fabiano */
|
||||
chd_ph_config_set_hashfuncs(mph, hashfuncs);
|
||||
break;
|
||||
case CMPH_CHD: /* included -- Fabiano */
|
||||
chd_config_set_hashfuncs(mph, hashfuncs);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -534,6 +555,10 @@ cmph_t *cmph_new(cmph_config_t *mph)
|
||||
DEBUGP("Creating chd_ph hash\n");
|
||||
mphf = chd_ph_new(mph, c);
|
||||
break;
|
||||
case CMPH_CHD: /* included -- Fabiano */
|
||||
DEBUGP("Creating chd hash\n");
|
||||
mphf = chd_new(mph, c);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
@ -560,6 +585,8 @@ int cmph_dump(cmph_t *mphf, FILE *f)
|
||||
return bdz_ph_dump(mphf, f);
|
||||
case CMPH_CHD_PH: /* included -- Fabiano */
|
||||
return chd_ph_dump(mphf, f);
|
||||
case CMPH_CHD: /* included -- Fabiano */
|
||||
return chd_dump(mphf, f);
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
@ -607,6 +634,10 @@ cmph_t *cmph_load(FILE *f)
|
||||
DEBUGP("Loading chd_ph algorithm dependent parts\n");
|
||||
chd_ph_load(f, mphf);
|
||||
break;
|
||||
case CMPH_CHD: /* included -- Fabiano */
|
||||
DEBUGP("Loading chd algorithm dependent parts\n");
|
||||
chd_load(f, mphf);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
@ -643,6 +674,9 @@ cmph_uint32 cmph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||
case CMPH_CHD_PH: /* included -- Fabiano */
|
||||
DEBUGP("chd_ph algorithm search\n");
|
||||
return chd_ph_search(mphf, key, keylen);
|
||||
case CMPH_CHD: /* included -- Fabiano */
|
||||
DEBUGP("chd algorithm search\n");
|
||||
return chd_search(mphf, key, keylen);
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
@ -683,6 +717,9 @@ void cmph_destroy(cmph_t *mphf)
|
||||
case CMPH_CHD_PH: /* included -- Fabiano */
|
||||
chd_ph_destroy(mphf);
|
||||
return;
|
||||
case CMPH_CHD: /* included -- Fabiano */
|
||||
chd_destroy(mphf);
|
||||
return;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
@ -728,6 +765,9 @@ void cmph_pack(cmph_t *mphf, void *packed_mphf)
|
||||
case CMPH_CHD_PH: /* included -- Fabiano */
|
||||
chd_ph_pack(mphf, ptr);
|
||||
break;
|
||||
case CMPH_CHD: /* included -- Fabiano */
|
||||
chd_pack(mphf, ptr);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
@ -759,6 +799,8 @@ cmph_uint32 cmph_packed_size(cmph_t *mphf)
|
||||
return bdz_ph_packed_size(mphf);
|
||||
case CMPH_CHD_PH: /* included -- Fabiano */
|
||||
return chd_ph_packed_size(mphf);
|
||||
case CMPH_CHD: /* included -- Fabiano */
|
||||
return chd_packed_size(mphf);
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
@ -794,6 +836,8 @@ cmph_uint32 cmph_search_packed(void *packed_mphf, const char *key, cmph_uint32 k
|
||||
return bdz_ph_search_packed(++ptr, key, keylen);
|
||||
case CMPH_CHD_PH: /* included -- Fabiano */
|
||||
return chd_ph_search_packed(++ptr, key, keylen);
|
||||
case CMPH_CHD: /* included -- Fabiano */
|
||||
return chd_search_packed(++ptr, key, keylen);
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
@ -101,6 +101,10 @@ cmph_uint32 cmph_packed_size(cmph_t *mphf);
|
||||
*/
|
||||
cmph_uint32 cmph_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||
|
||||
// TIMING functions. To use the macro CMPH_TIMING must be defined
|
||||
#include "cmph_time.h"
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
62
src/cmph_time.h
Normal file
62
src/cmph_time.h
Normal file
@ -0,0 +1,62 @@
|
||||
#ifdef ELAPSED_TIME_IN_SECONDS
|
||||
#undef ELAPSED_TIME_IN_SECONDS
|
||||
#endif
|
||||
|
||||
#ifdef ELAPSED_TIME_IN_uSECONDS
|
||||
#undef ELAPSED_TIME_IN_uSECONDS
|
||||
#endif
|
||||
|
||||
#ifdef WIN32
|
||||
// include headers to use gettimeofday
|
||||
#else
|
||||
#ifdef __GNUC__
|
||||
#include <sys/time.h>
|
||||
#include <sys/resource.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
#ifndef __CMPH_TIME_H__
|
||||
#define __CMPH_TIME_H__
|
||||
static inline void elapsed_time_in_seconds(double * elapsed_time)
|
||||
{
|
||||
struct timeval e_time;
|
||||
if (gettimeofday(&e_time, NULL) < 0) {
|
||||
return;
|
||||
}
|
||||
*elapsed_time = (double)e_time.tv_sec + ((double)e_time.tv_usec/1000000.0);
|
||||
}
|
||||
static inline void dummy_elapsed_time_in_seconds()
|
||||
{
|
||||
}
|
||||
static inline void elapsed_time_in_useconds(cmph_uint64 * elapsed_time)
|
||||
{
|
||||
struct timeval e_time;
|
||||
if (gettimeofday(&e_time, NULL) < 0) {
|
||||
return;
|
||||
}
|
||||
*elapsed_time = e_time.tv_sec*1000000 + e_time.tv_usec;
|
||||
}
|
||||
static inline void dummy_elapsed_time_in_useconds()
|
||||
{
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef CMPH_TIMING
|
||||
#ifdef __GNUC__
|
||||
#define ELAPSED_TIME_IN_SECONDS elapsed_time_in_seconds
|
||||
#define ELAPSED_TIME_IN_uSECONDS elapsed_time_in_useconds
|
||||
#else
|
||||
#define ELAPSED_TIME_IN_SECONDS dummy_elapsed_time_in_seconds
|
||||
#define ELAPSED_TIME_IN_uSECONDS dummy_elapsed_time_in_useconds
|
||||
#endif
|
||||
#else
|
||||
#ifdef __GNUC__
|
||||
#define ELAPSED_TIME_IN_SECONDS
|
||||
#define ELAPSED_TIME_IN_uSECONDS
|
||||
#else
|
||||
#define ELAPSED_TIME_IN_SECONDS dummy_elapsed_time_in_seconds
|
||||
#define ELAPSED_TIME_IN_uSECONDS dummy_elapsed_time_in_useconds
|
||||
#endif
|
||||
#endif
|
@ -35,7 +35,7 @@ typedef unsigned int cmph_uint32;
|
||||
typedef enum { CMPH_HASH_JENKINS, CMPH_HASH_COUNT } CMPH_HASH;
|
||||
extern const char *cmph_hash_names[];
|
||||
typedef enum { CMPH_BMZ, CMPH_BMZ8, CMPH_CHM, CMPH_BRZ, CMPH_FCH,
|
||||
CMPH_BDZ, CMPH_BDZ_PH, CMPH_CHD_PH, CMPH_COUNT } CMPH_ALGO; /* included -- Fabiano */
|
||||
CMPH_BDZ, CMPH_BDZ_PH, CMPH_CHD_PH, CMPH_CHD, CMPH_COUNT } CMPH_ALGO; /* included -- Fabiano */
|
||||
extern const char *cmph_names[];
|
||||
|
||||
#endif
|
||||
|
321
src/compressed_rank.c
Normal file
321
src/compressed_rank.c
Normal file
@ -0,0 +1,321 @@
|
||||
#include<stdlib.h>
|
||||
#include<stdio.h>
|
||||
#include<limits.h>
|
||||
#include<string.h>
|
||||
#include"compressed_rank.h"
|
||||
#include"bitbool.h"
|
||||
// #define DEBUG
|
||||
#include"debug.h"
|
||||
static inline cmph_uint32 compressed_rank_i_log2(cmph_uint32 x)
|
||||
{
|
||||
register cmph_uint32 res = 0;
|
||||
|
||||
while(x > 1)
|
||||
{
|
||||
x >>= 1;
|
||||
res++;
|
||||
}
|
||||
return res;
|
||||
};
|
||||
|
||||
void compressed_rank_init(compressed_rank_t * cr)
|
||||
{
|
||||
cr->max_val = 0;
|
||||
cr->n = 0;
|
||||
cr->rem_r = 0;
|
||||
select_init(&cr->sel);
|
||||
cr->vals_rems = 0;
|
||||
}
|
||||
|
||||
void compressed_rank_destroy(compressed_rank_t * cr)
|
||||
{
|
||||
free(cr->vals_rems);
|
||||
cr->vals_rems = 0;
|
||||
select_destroy(&cr->sel);
|
||||
}
|
||||
|
||||
void compressed_rank_generate(compressed_rank_t * cr, cmph_uint32 * vals_table, cmph_uint32 n)
|
||||
{
|
||||
register cmph_uint32 i,j;
|
||||
register cmph_uint32 rems_mask;
|
||||
register cmph_uint32 * select_vec = 0;
|
||||
cr->n = n;
|
||||
cr->max_val = vals_table[cr->n - 1];
|
||||
cr->rem_r = compressed_rank_i_log2(cr->max_val/cr->n);
|
||||
if(cr->rem_r == 0)
|
||||
{
|
||||
cr->rem_r = 1;
|
||||
}
|
||||
select_vec = (cmph_uint32 *) calloc(cr->max_val >> cr->rem_r, sizeof(cmph_uint32));
|
||||
cr->vals_rems = (cmph_uint32 *) calloc(BITS_TABLE_SIZE(cr->n, cr->rem_r), sizeof(cmph_uint32));
|
||||
rems_mask = (1 << cr->rem_r) - 1;
|
||||
|
||||
for(i = 0; i < cr->n; i++)
|
||||
{
|
||||
set_bits_value(cr->vals_rems, i, vals_table[i] & rems_mask, cr->rem_r, rems_mask);
|
||||
}
|
||||
|
||||
for(i = 1, j = 0; i <= cr->max_val >> cr->rem_r; i++)
|
||||
{
|
||||
while(i > (vals_table[j] >> cr->rem_r))
|
||||
{
|
||||
j++;
|
||||
}
|
||||
select_vec[i - 1] = j;
|
||||
};
|
||||
|
||||
|
||||
// FABIANO: before it was (cr->total_length >> cr->rem_r) + 1. But I wiped out the + 1 because
|
||||
// I changed the select structure to work up to m, instead of up to m - 1.
|
||||
select_generate(&cr->sel, select_vec, cr->max_val >> cr->rem_r, cr->n);
|
||||
|
||||
free(select_vec);
|
||||
}
|
||||
|
||||
cmph_uint32 compressed_rank_query(compressed_rank_t * cr, cmph_uint32 idx)
|
||||
{
|
||||
register cmph_uint32 rems_mask;
|
||||
register cmph_uint32 val_quot, val_rem;
|
||||
register cmph_uint32 sel_res, rank;
|
||||
|
||||
if(idx > cr->max_val)
|
||||
{
|
||||
return cr->n;
|
||||
}
|
||||
|
||||
val_quot = idx >> cr->rem_r;
|
||||
rems_mask = (1 << cr->rem_r) - 1;
|
||||
val_rem = idx & rems_mask;
|
||||
if(val_quot == 0)
|
||||
{
|
||||
rank = sel_res = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
sel_res = select_query(&cr->sel, val_quot - 1) + 1;
|
||||
rank = sel_res - val_quot;
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
if(GETBIT32(cr->sel.bits_vec, sel_res))
|
||||
{
|
||||
break;
|
||||
}
|
||||
if(get_bits_value(cr->vals_rems, rank, cr->rem_r, rems_mask) >= val_rem)
|
||||
{
|
||||
break;
|
||||
}
|
||||
sel_res++;
|
||||
rank++;
|
||||
} while(1);
|
||||
|
||||
return rank;
|
||||
}
|
||||
|
||||
cmph_uint32 compressed_rank_get_space_usage(compressed_rank_t * cr)
|
||||
{
|
||||
register cmph_uint32 space_usage = select_get_space_usage(&cr->sel);
|
||||
space_usage += BITS_TABLE_SIZE(cr->n, cr->rem_r)*sizeof(cmph_uint32)*8;
|
||||
space_usage += 3*sizeof(cmph_uint32)*8;
|
||||
return space_usage;
|
||||
}
|
||||
|
||||
void compressed_rank_dump(compressed_rank_t * cr, char **buf, cmph_uint32 *buflen)
|
||||
{
|
||||
register cmph_uint32 sel_size = select_packed_size(&(cr->sel));
|
||||
register cmph_uint32 vals_rems_size = BITS_TABLE_SIZE(cr->n, cr->rem_r) * sizeof(cmph_uint32);
|
||||
register cmph_uint32 pos = 0;
|
||||
char * buf_sel = 0;
|
||||
cmph_uint32 buflen_sel = 0;
|
||||
|
||||
*buflen = 4*sizeof(cmph_uint32) + sel_size + vals_rems_size;
|
||||
|
||||
DEBUGP("sel_size = %u\n", sel_size);
|
||||
DEBUGP("vals_rems_size = %u\n", vals_rems_size);
|
||||
|
||||
*buf = (char *)calloc(*buflen, sizeof(char));
|
||||
|
||||
if (!*buf)
|
||||
{
|
||||
*buflen = UINT_MAX;
|
||||
return;
|
||||
}
|
||||
|
||||
// dumping max_val, n and rem_r
|
||||
memcpy(*buf, &(cr->max_val), sizeof(cmph_uint32));
|
||||
pos += sizeof(cmph_uint32);
|
||||
DEBUGP("max_val = %u\n", cr->max_val);
|
||||
|
||||
memcpy(*buf + pos, &(cr->n), sizeof(cmph_uint32));
|
||||
pos += sizeof(cmph_uint32);
|
||||
DEBUGP("n = %u\n", cr->n);
|
||||
|
||||
memcpy(*buf + pos, &(cr->rem_r), sizeof(cmph_uint32));
|
||||
pos += sizeof(cmph_uint32);
|
||||
DEBUGP("rem_r = %u\n", cr->rem_r);
|
||||
|
||||
// dumping sel
|
||||
select_dump(&cr->sel, &buf_sel, &buflen_sel);
|
||||
memcpy(*buf + pos, &buflen_sel, sizeof(cmph_uint32));
|
||||
pos += sizeof(cmph_uint32);
|
||||
DEBUGP("buflen_sel = %u\n", buflen_sel);
|
||||
|
||||
memcpy(*buf + pos, buf_sel, buflen_sel);
|
||||
|
||||
#ifdef DEBUG
|
||||
cmph_uint32 i = 0;
|
||||
for(i = 0; i < buflen_sel; i++)
|
||||
{
|
||||
DEBUGP("pos = %u -- buf_sel[%u] = %u\n", pos, i, *(*buf + pos + i));
|
||||
}
|
||||
#endif
|
||||
pos += buflen_sel;
|
||||
|
||||
free(buf_sel);
|
||||
|
||||
// dumping vals_rems
|
||||
memcpy(*buf + pos, cr->vals_rems, vals_rems_size);
|
||||
#ifdef DEBUG
|
||||
for(i = 0; i < vals_rems_size; i++)
|
||||
{
|
||||
DEBUGP("pos = %u -- vals_rems_size = %u -- vals_rems[%u] = %u\n", pos, vals_rems_size, i, *(*buf + pos + i));
|
||||
}
|
||||
#endif
|
||||
pos += vals_rems_size;
|
||||
|
||||
DEBUGP("Dumped compressed rank structure with size %u bytes\n", *buflen);
|
||||
}
|
||||
|
||||
void compressed_rank_load(compressed_rank_t * cr, const char *buf, cmph_uint32 buflen)
|
||||
{
|
||||
register cmph_uint32 pos = 0;
|
||||
cmph_uint32 buflen_sel = 0;
|
||||
register cmph_uint32 vals_rems_size = 0;
|
||||
|
||||
// loading max_val, n, and rem_r
|
||||
memcpy(&(cr->max_val), buf, sizeof(cmph_uint32));
|
||||
pos += sizeof(cmph_uint32);
|
||||
DEBUGP("max_val = %u\n", cr->max_val);
|
||||
|
||||
memcpy(&(cr->n), buf + pos, sizeof(cmph_uint32));
|
||||
pos += sizeof(cmph_uint32);
|
||||
DEBUGP("n = %u\n", cr->n);
|
||||
|
||||
memcpy(&(cr->rem_r), buf + pos, sizeof(cmph_uint32));
|
||||
pos += sizeof(cmph_uint32);
|
||||
DEBUGP("rem_r = %u\n", cr->rem_r);
|
||||
|
||||
// loading sel
|
||||
memcpy(&buflen_sel, buf + pos, sizeof(cmph_uint32));
|
||||
pos += sizeof(cmph_uint32);
|
||||
DEBUGP("buflen_sel = %u\n", buflen_sel);
|
||||
|
||||
select_load(&cr->sel, buf + pos, buflen_sel);
|
||||
#ifdef DEBUG
|
||||
cmph_uint32 i = 0;
|
||||
for(i = 0; i < buflen_sel; i++)
|
||||
{
|
||||
DEBUGP("pos = %u -- buf_sel[%u] = %u\n", pos, i, *(buf + pos + i));
|
||||
}
|
||||
#endif
|
||||
pos += buflen_sel;
|
||||
|
||||
// loading vals_rems
|
||||
if(cr->vals_rems)
|
||||
{
|
||||
free(cr->vals_rems);
|
||||
}
|
||||
vals_rems_size = BITS_TABLE_SIZE(cr->n, cr->rem_r);
|
||||
cr->vals_rems = (cmph_uint32 *) calloc(vals_rems_size, sizeof(cmph_uint32));
|
||||
vals_rems_size *= 4;
|
||||
memcpy(cr->vals_rems, buf + pos, vals_rems_size);
|
||||
|
||||
#ifdef DEBUG
|
||||
for(i = 0; i < vals_rems_size; i++)
|
||||
{
|
||||
DEBUGP("pos = %u -- vals_rems_size = %u -- vals_rems[%u] = %u\n", pos, vals_rems_size, i, *(buf + pos + i));
|
||||
}
|
||||
#endif
|
||||
pos += vals_rems_size;
|
||||
|
||||
DEBUGP("Loaded compressed rank structure with size %u bytes\n", buflen);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void compressed_rank_pack(compressed_rank_t *cr, void *cr_packed)
|
||||
{
|
||||
if (cr && cr_packed)
|
||||
{
|
||||
char *buf = NULL;
|
||||
cmph_uint32 buflen = 0;
|
||||
compressed_rank_dump(cr, &buf, &buflen);
|
||||
memcpy(cr_packed, buf, buflen);
|
||||
free(buf);
|
||||
}
|
||||
}
|
||||
|
||||
cmph_uint32 compressed_rank_packed_size(compressed_rank_t *cr)
|
||||
{
|
||||
register cmph_uint32 sel_size = select_packed_size(&cr->sel);
|
||||
register cmph_uint32 vals_rems_size = BITS_TABLE_SIZE(cr->n, cr->rem_r) * sizeof(cmph_uint32);
|
||||
return 4 * sizeof(cmph_uint32) + sel_size + vals_rems_size;
|
||||
}
|
||||
|
||||
cmph_uint32 compressed_rank_query_packed(void * cr_packed, cmph_uint32 idx)
|
||||
{
|
||||
// unpacking cr_packed
|
||||
register cmph_uint32 *ptr = (cmph_uint32 *)cr_packed;
|
||||
register cmph_uint32 max_val = *ptr++;
|
||||
register cmph_uint32 n = *ptr++;
|
||||
register cmph_uint32 rem_r = *ptr++;
|
||||
register cmph_uint32 buflen_sel = *ptr++;
|
||||
register cmph_uint32 * sel_packed = ptr;
|
||||
|
||||
register cmph_uint32 * bits_vec = sel_packed + 2; // skipping n and m
|
||||
|
||||
register cmph_uint32 * vals_rems = (ptr += (buflen_sel >> 2));
|
||||
|
||||
// compressed sequence query computation
|
||||
register cmph_uint32 rems_mask;
|
||||
register cmph_uint32 val_quot, val_rem;
|
||||
register cmph_uint32 sel_res, rank;
|
||||
|
||||
if(idx > max_val)
|
||||
{
|
||||
return n;
|
||||
}
|
||||
|
||||
val_quot = idx >> rem_r;
|
||||
rems_mask = (1 << rem_r) - 1;
|
||||
val_rem = idx & rems_mask;
|
||||
if(val_quot == 0)
|
||||
{
|
||||
rank = sel_res = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
sel_res = select_query_packed(sel_packed, val_quot - 1) + 1;
|
||||
rank = sel_res - val_quot;
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
if(GETBIT32(bits_vec, sel_res))
|
||||
{
|
||||
break;
|
||||
}
|
||||
if(get_bits_value(vals_rems, rank, rem_r, rems_mask) >= val_rem)
|
||||
{
|
||||
break;
|
||||
}
|
||||
sel_res++;
|
||||
rank++;
|
||||
} while(1);
|
||||
|
||||
return rank;
|
||||
}
|
||||
|
||||
|
||||
|
55
src/compressed_rank.h
Normal file
55
src/compressed_rank.h
Normal file
@ -0,0 +1,55 @@
|
||||
#ifndef __CMPH_COMPRESSED_RANK_H__
|
||||
#define __CMPH_COMPRESSED_RANK_H__
|
||||
|
||||
#include "select.h"
|
||||
|
||||
struct _compressed_rank_t
|
||||
{
|
||||
cmph_uint32 max_val;
|
||||
cmph_uint32 n; // number of values stored in vals_rems
|
||||
// The length in bits of each value is decomposed into two compnents: the lg(n) MSBs are stored in rank_select data structure
|
||||
// the remaining LSBs are stored in a table of n cells, each one of rem_r bits.
|
||||
cmph_uint32 rem_r;
|
||||
select_t sel;
|
||||
cmph_uint32 * vals_rems;
|
||||
};
|
||||
|
||||
typedef struct _compressed_rank_t compressed_rank_t;
|
||||
|
||||
void compressed_rank_init(compressed_rank_t * cr);
|
||||
|
||||
void compressed_rank_destroy(compressed_rank_t * cr);
|
||||
|
||||
void compressed_rank_generate(compressed_rank_t * cr, cmph_uint32 * vals_table, cmph_uint32 n);
|
||||
|
||||
cmph_uint32 compressed_rank_query(compressed_rank_t * cr, cmph_uint32 idx);
|
||||
|
||||
cmph_uint32 compressed_rank_get_space_usage(compressed_rank_t * cr);
|
||||
|
||||
void compressed_rank_dump(compressed_rank_t * cr, char **buf, cmph_uint32 *buflen);
|
||||
|
||||
void compressed_rank_load(compressed_rank_t * cr, const char *buf, cmph_uint32 buflen);
|
||||
|
||||
|
||||
/** \fn void compressed_rank_pack(compressed_rank_t *cr, void *cr_packed);
|
||||
* \brief Support the ability to pack a compressed_rank structure into a preallocated contiguous memory space pointed by cr_packed.
|
||||
* \param cr points to the compressed_rank structure
|
||||
* \param cr_packed pointer to the contiguous memory area used to store the compressed_rank structure. The size of cr_packed must be at least @see compressed_rank_packed_size
|
||||
*/
|
||||
void compressed_rank_pack(compressed_rank_t *cr, void *cr_packed);
|
||||
|
||||
/** \fn cmph_uint32 compressed_rank_packed_size(compressed_rank_t *cr);
|
||||
* \brief Return the amount of space needed to pack a compressed_rank structure.
|
||||
* \return the size of the packed compressed_rank structure or zero for failures
|
||||
*/
|
||||
cmph_uint32 compressed_rank_packed_size(compressed_rank_t *cr);
|
||||
|
||||
|
||||
/** \fn cmph_uint32 compressed_rank_query_packed(void * cr_packed, cmph_uint32 idx);
|
||||
* \param cr_packed is a pointer to a contiguous memory area
|
||||
* \param idx is an index to compute the rank
|
||||
* \return an integer that represents the compressed_rank value.
|
||||
*/
|
||||
cmph_uint32 compressed_rank_query_packed(void * cr_packed, cmph_uint32 idx);
|
||||
|
||||
#endif
|
@ -10,7 +10,7 @@
|
||||
// #define DEBUG
|
||||
#include "debug.h"
|
||||
|
||||
static inline cmph_uint32 i_log2(cmph_uint32 x)
|
||||
static inline cmph_uint32 compressed_seq_i_log2(cmph_uint32 x)
|
||||
{
|
||||
register cmph_uint32 res = 0;
|
||||
|
||||
@ -61,7 +61,7 @@ void compressed_seq_generate(compressed_seq_t * cs, cmph_uint32 * vals_table, cm
|
||||
}
|
||||
else
|
||||
{
|
||||
lengths[i] = i_log2(vals_table[i] + 1);
|
||||
lengths[i] = compressed_seq_i_log2(vals_table[i] + 1);
|
||||
cs->total_length += lengths[i];
|
||||
};
|
||||
};
|
||||
@ -82,7 +82,12 @@ void compressed_seq_generate(compressed_seq_t * cs, cmph_uint32 * vals_table, cm
|
||||
cs->total_length += lengths[i];
|
||||
};
|
||||
|
||||
cs->rem_r = i_log2(cs->total_length/cs->n);
|
||||
cs->rem_r = compressed_seq_i_log2(cs->total_length/cs->n);
|
||||
|
||||
if(cs->rem_r == 0)
|
||||
{
|
||||
cs->rem_r = 1;
|
||||
}
|
||||
|
||||
if(cs->length_rems)
|
||||
{
|
||||
@ -118,7 +123,7 @@ cmph_uint32 compressed_seq_get_space_usage(compressed_seq_t * cs)
|
||||
return 4 * sizeof(cmph_uint32) * 8 + space_usage;
|
||||
}
|
||||
|
||||
cmph_int32 compressed_seq_query(compressed_seq_t * cs, cmph_uint32 idx)
|
||||
cmph_uint32 compressed_seq_query(compressed_seq_t * cs, cmph_uint32 idx)
|
||||
{
|
||||
register cmph_uint32 enc_idx, enc_length;
|
||||
register cmph_uint32 rems_mask;
|
||||
@ -156,7 +161,7 @@ cmph_int32 compressed_seq_query(compressed_seq_t * cs, cmph_uint32 idx)
|
||||
|
||||
void compressed_seq_dump(compressed_seq_t * cs, char ** buf, cmph_uint32 * buflen)
|
||||
{
|
||||
register cmph_uint32 sel_size = select_get_space_usage(&cs->sel) >> 3;
|
||||
register cmph_uint32 sel_size = select_packed_size(&(cs->sel));
|
||||
register cmph_uint32 length_rems_size = BITS_TABLE_SIZE(cs->n, cs->rem_r) * 4;
|
||||
register cmph_uint32 store_table_size = ((cs->total_length + 31) >> 5) * 4;
|
||||
register cmph_uint32 pos = 0;
|
||||
@ -325,7 +330,7 @@ cmph_uint32 compressed_seq_packed_size(compressed_seq_t *cs)
|
||||
}
|
||||
|
||||
|
||||
cmph_int32 compressed_seq_query_packed(void * cs_packed, cmph_uint32 idx)
|
||||
cmph_uint32 compressed_seq_query_packed(void * cs_packed, cmph_uint32 idx)
|
||||
{
|
||||
// unpacking cs_packed
|
||||
register cmph_uint32 *ptr = (cmph_uint32 *)cs_packed;
|
||||
|
@ -38,13 +38,13 @@ void compressed_seq_destroy(compressed_seq_t * cs);
|
||||
void compressed_seq_generate(compressed_seq_t * cs, cmph_uint32 * vals_table, cmph_uint32 n);
|
||||
|
||||
|
||||
/** \fn cmph_int32 compressed_seq_query(compressed_seq_t * cs, cmph_uint32 idx);
|
||||
/** \fn cmph_uint32 compressed_seq_query(compressed_seq_t * cs, cmph_uint32 idx);
|
||||
* \brief Returns the value stored at index @see idx of the compressed sequence structure.
|
||||
* \param cs points to the compressed sequence structure
|
||||
* \param idx index to retrieve the value from
|
||||
* \return the value stored at index @see idx of the compressed sequence structure
|
||||
*/
|
||||
cmph_int32 compressed_seq_query(compressed_seq_t * cs, cmph_uint32 idx);
|
||||
cmph_uint32 compressed_seq_query(compressed_seq_t * cs, cmph_uint32 idx);
|
||||
|
||||
|
||||
/** \fn cmph_uint32 compressed_seq_get_space_usage(compressed_seq_t * cs);
|
||||
@ -73,12 +73,12 @@ void compressed_seq_pack(compressed_seq_t *cs, void *cs_packed);
|
||||
cmph_uint32 compressed_seq_packed_size(compressed_seq_t *cs);
|
||||
|
||||
|
||||
/** \fn cmph_int32 compressed_seq_query_packed(void * cs_packed, cmph_uint32 idx);
|
||||
/** \fn cmph_uint32 compressed_seq_query_packed(void * cs_packed, cmph_uint32 idx);
|
||||
* \brief Returns the value stored at index @see idx of the packed compressed sequence structure.
|
||||
* \param cs_packed is a pointer to a contiguous memory area
|
||||
* \param idx is the index to retrieve the value from
|
||||
* \return the value stored at index @see idx of the packed compressed sequence structure
|
||||
*/
|
||||
cmph_int32 compressed_seq_query_packed(void * cs_packed, cmph_uint32 idx);
|
||||
cmph_uint32 compressed_seq_query_packed(void * cs_packed, cmph_uint32 idx);
|
||||
|
||||
#endif
|
||||
|
22
src/select.c
22
src/select.c
@ -164,7 +164,7 @@ void select_generate(select_t * sel, cmph_uint32 * keys_vec, cmph_uint32 n, cmph
|
||||
select_generate_sel_table(sel);
|
||||
};
|
||||
|
||||
static inline cmph_int32 _select_query(cmph_uint8 * bits_table, cmph_uint32 * select_table, cmph_uint32 one_idx)
|
||||
static inline cmph_uint32 _select_query(cmph_uint8 * bits_table, cmph_uint32 * select_table, cmph_uint32 one_idx)
|
||||
{
|
||||
register cmph_uint32 vec_bit_idx ,vec_byte_idx;
|
||||
register cmph_uint32 part_sum, old_part_sum;
|
||||
@ -187,13 +187,13 @@ static inline cmph_int32 _select_query(cmph_uint8 * bits_table, cmph_uint32 * se
|
||||
return select_lookup_table[bits_table[vec_byte_idx - 1]][one_idx - old_part_sum] + ((vec_byte_idx-1) << 3);
|
||||
}
|
||||
|
||||
cmph_int32 select_query(select_t * sel, cmph_uint32 one_idx)
|
||||
cmph_uint32 select_query(select_t * sel, cmph_uint32 one_idx)
|
||||
{
|
||||
return _select_query((cmph_uint8 *)sel->bits_vec, sel->select_table, one_idx);
|
||||
};
|
||||
|
||||
|
||||
static inline cmph_int32 _select_next_query(cmph_uint8 * bits_table, cmph_uint32 vec_bit_idx)
|
||||
static inline cmph_uint32 _select_next_query(cmph_uint8 * bits_table, cmph_uint32 vec_bit_idx)
|
||||
{
|
||||
register cmph_uint32 vec_byte_idx, one_idx;
|
||||
register cmph_uint32 part_sum, old_part_sum;
|
||||
@ -214,7 +214,7 @@ static inline cmph_int32 _select_next_query(cmph_uint8 * bits_table, cmph_uint32
|
||||
return select_lookup_table[bits_table[(vec_byte_idx - 1)]][(one_idx - old_part_sum)] + ((vec_byte_idx - 1) << 3);
|
||||
}
|
||||
|
||||
cmph_int32 select_next_query(select_t * sel, cmph_uint32 vec_bit_idx)
|
||||
cmph_uint32 select_next_query(select_t * sel, cmph_uint32 vec_bit_idx)
|
||||
{
|
||||
return _select_next_query((cmph_uint8 *)sel->bits_vec, vec_bit_idx);
|
||||
};
|
||||
@ -315,12 +315,7 @@ cmph_uint32 select_packed_size(select_t *sel)
|
||||
|
||||
|
||||
|
||||
/** \fn cmph_int32 select_query_packed(void * sel_packed, cmph_uint32 idx);
|
||||
* \param sel_packed is a pointer to a contiguous memory area
|
||||
* \param idx is the rank for which we want to calculate the inverse function select
|
||||
* \return an integer that represents the select value of rank idx.
|
||||
*/
|
||||
cmph_int32 select_query_packed(void * sel_packed, cmph_uint32 one_idx)
|
||||
cmph_uint32 select_query_packed(void * sel_packed, cmph_uint32 one_idx)
|
||||
{
|
||||
register cmph_uint32 *ptr = (cmph_uint32 *)sel_packed;
|
||||
register cmph_uint32 n = *ptr++;
|
||||
@ -334,12 +329,7 @@ cmph_int32 select_query_packed(void * sel_packed, cmph_uint32 one_idx)
|
||||
}
|
||||
|
||||
|
||||
/** \fn cmph_int32 select_next_query_packed(void * sel_packed, cmph_uint32 vec_bit_idx);
|
||||
* \param sel_packed is a pointer to a contiguous memory area
|
||||
* \param vec_bit_idx is a value prior computed by @see select_query_packed
|
||||
* \return an integer that represents the next select value greater than @see vec_bit_idx.
|
||||
*/
|
||||
cmph_int32 select_next_query_packed(void * sel_packed, cmph_uint32 vec_bit_idx)
|
||||
cmph_uint32 select_next_query_packed(void * sel_packed, cmph_uint32 vec_bit_idx)
|
||||
{
|
||||
register cmph_uint8 * bits_vec = (cmph_uint8 *)sel_packed;
|
||||
bits_vec += 8; // skipping n and m
|
||||
|
12
src/select.h
12
src/select.h
@ -18,9 +18,9 @@ void select_destroy(select_t * sel);
|
||||
|
||||
void select_generate(select_t * sel, cmph_uint32 * keys_vec, cmph_uint32 n, cmph_uint32 m);
|
||||
|
||||
cmph_int32 select_query(select_t * sel, cmph_uint32 one_idx);
|
||||
cmph_uint32 select_query(select_t * sel, cmph_uint32 one_idx);
|
||||
|
||||
cmph_int32 select_next_query(select_t * sel, cmph_uint32 vec_bit_idx);
|
||||
cmph_uint32 select_next_query(select_t * sel, cmph_uint32 vec_bit_idx);
|
||||
|
||||
cmph_uint32 select_get_space_usage(select_t * sel);
|
||||
|
||||
@ -43,19 +43,19 @@ void select_pack(select_t *sel, void *sel_packed);
|
||||
cmph_uint32 select_packed_size(select_t *sel);
|
||||
|
||||
|
||||
/** \fn cmph_int32 select_query_packed(void * sel_packed, cmph_uint32 idx);
|
||||
/** \fn cmph_uint32 select_query_packed(void * sel_packed, cmph_uint32 one_idx);
|
||||
* \param sel_packed is a pointer to a contiguous memory area
|
||||
* \param one_idx is the rank for which we want to calculate the inverse function select
|
||||
* \return an integer that represents the select value of rank idx.
|
||||
*/
|
||||
cmph_int32 select_query_packed(void * sel_packed, cmph_uint32 one_idx);
|
||||
cmph_uint32 select_query_packed(void * sel_packed, cmph_uint32 one_idx);
|
||||
|
||||
|
||||
/** \fn cmph_int32 select_next_query_packed(void * sel_packed, cmph_uint32 vec_bit_idx);
|
||||
/** \fn cmph_uint32 select_next_query_packed(void * sel_packed, cmph_uint32 vec_bit_idx);
|
||||
* \param sel_packed is a pointer to a contiguous memory area
|
||||
* \param vec_bit_idx is a value prior computed by @see select_query_packed
|
||||
* \return an integer that represents the next select value greater than @see vec_bit_idx.
|
||||
*/
|
||||
cmph_int32 select_next_query_packed(void * sel_packed, cmph_uint32 vec_bit_idx);
|
||||
cmph_uint32 select_next_query_packed(void * sel_packed, cmph_uint32 vec_bit_idx);
|
||||
|
||||
#endif
|
||||
|
@ -1,4 +1,4 @@
|
||||
noinst_PROGRAMS = graph_tests packed_mphf_tests mphf_tests select_tests compressed_seq_tests
|
||||
noinst_PROGRAMS = graph_tests packed_mphf_tests mphf_tests select_tests compressed_seq_tests compressed_rank_tests
|
||||
|
||||
INCLUDES = -I../src/
|
||||
|
||||
@ -16,3 +16,6 @@ select_tests_LDADD = ../src/libcmph.la
|
||||
|
||||
compressed_seq_tests_SOURCES = compressed_seq_tests.c
|
||||
compressed_seq_tests_LDADD = ../src/libcmph.la
|
||||
|
||||
compressed_rank_tests_SOURCES = compressed_rank_tests.c
|
||||
compressed_rank_tests_LDADD = ../src/libcmph.la
|
||||
|
@ -27,7 +27,7 @@ int main(int argc, char **argv)
|
||||
cmph_uint32 i = 0;
|
||||
cmph_uint32 n = 20;
|
||||
cmph_uint32 keys_vec[] = { 0, 1, 1, 1, 2, 2, 2, 3, 5, 5,
|
||||
6, 6, 9, 9, 9, 12, 12, 13, 17, 10017};
|
||||
6, 6, 9, 9, 9, 12, 12, 13, 17, 1077};
|
||||
char *buf = NULL;
|
||||
cmph_uint32 buflen = 0;
|
||||
char * cs_packed = NULL;
|
||||
|
@ -154,6 +154,12 @@ int main(int argc, char **argv)
|
||||
|
||||
// testing the packed function
|
||||
//check all keys
|
||||
#ifdef CMPH_TIMING
|
||||
double evaluation_time_begin = 0.0;
|
||||
double evaluation_time = 0.0;
|
||||
ELAPSED_TIME_IN_SECONDS(&evaluation_time_begin);
|
||||
#endif
|
||||
|
||||
for (i = 0; i < source->nkeys; ++i)
|
||||
{
|
||||
cmph_uint32 h;
|
||||
@ -179,6 +185,12 @@ int main(int argc, char **argv)
|
||||
}
|
||||
source->dispose(source->data, buf, buflen);
|
||||
}
|
||||
#ifdef CMPH_TIMING
|
||||
ELAPSED_TIME_IN_SECONDS(&evaluation_time);
|
||||
evaluation_time = evaluation_time - evaluation_time_begin;
|
||||
fprintf(stdout, "%u\t%.2f\n", source->nkeys, evaluation_time);
|
||||
#endif
|
||||
|
||||
free(packed_mphf);
|
||||
cmph_destroy(mphf);
|
||||
free(hashtable);
|
||||
|
Loading…
Reference in New Issue
Block a user