BRZ is working with FCH or BMZ8. BMZ8 is faster but the MPHFs for each bucket are larger
This commit is contained in:
parent
c9edcadd8f
commit
8adabac1c9
@ -66,6 +66,7 @@ cmph_t *bmz_new(cmph_config_t *mph, float c)
|
|||||||
cmph_uint8 * visited = NULL;
|
cmph_uint8 * visited = NULL;
|
||||||
|
|
||||||
bmz_config_data_t *bmz = (bmz_config_data_t *)mph->data;
|
bmz_config_data_t *bmz = (bmz_config_data_t *)mph->data;
|
||||||
|
if (c == 0) c = 1.15; // validating restrictions over parameter c.
|
||||||
DEBUGP("c: %f\n", c);
|
DEBUGP("c: %f\n", c);
|
||||||
bmz->m = mph->key_source->nkeys;
|
bmz->m = mph->key_source->nkeys;
|
||||||
bmz->n = ceil(c * mph->key_source->nkeys);
|
bmz->n = ceil(c * mph->key_source->nkeys);
|
||||||
|
@ -70,7 +70,7 @@ cmph_t *bmz8_new(cmph_config_t *mph, float c)
|
|||||||
if (mph->verbosity) fprintf(stderr, "The number of keys in BMZ8 must be lower than 256.\n");
|
if (mph->verbosity) fprintf(stderr, "The number of keys in BMZ8 must be lower than 256.\n");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
if (c == 0) c = 1.15; // validating restrictions over parameter c.
|
||||||
DEBUGP("c: %f\n", c);
|
DEBUGP("c: %f\n", c);
|
||||||
bmz8->m = mph->key_source->nkeys;
|
bmz8->m = mph->key_source->nkeys;
|
||||||
bmz8->n = ceil(c * mph->key_source->nkeys);
|
bmz8->n = ceil(c * mph->key_source->nkeys);
|
||||||
|
146
src/brz.c
146
src/brz.c
@ -1,4 +1,6 @@
|
|||||||
#include "graph.h"
|
#include "graph.h"
|
||||||
|
#include "fch.h"
|
||||||
|
#include "fch_structs.h"
|
||||||
#include "bmz8.h"
|
#include "bmz8.h"
|
||||||
#include "bmz8_structs.h"
|
#include "bmz8_structs.h"
|
||||||
#include "brz.h"
|
#include "brz.h"
|
||||||
@ -20,11 +22,13 @@
|
|||||||
static int brz_gen_mphf(cmph_config_t *mph);
|
static int brz_gen_mphf(cmph_config_t *mph);
|
||||||
static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n);
|
static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n);
|
||||||
static void brz_destroy_keys_vd(cmph_uint8 ** keys_vd, cmph_uint8 nkeys);
|
static void brz_destroy_keys_vd(cmph_uint8 ** keys_vd, cmph_uint8 nkeys);
|
||||||
static char * brz_copy_partial_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_uint32 *buflen);
|
static char * brz_copy_partial_fch_mphf(brz_config_data_t *brz, fch_data_t * fchf, cmph_uint32 index, cmph_uint32 *buflen);
|
||||||
|
static char * brz_copy_partial_bmz8_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_uint32 *buflen);
|
||||||
brz_config_data_t *brz_config_new()
|
brz_config_data_t *brz_config_new()
|
||||||
{
|
{
|
||||||
brz_config_data_t *brz = NULL;
|
brz_config_data_t *brz = NULL;
|
||||||
brz = (brz_config_data_t *)malloc(sizeof(brz_config_data_t));
|
brz = (brz_config_data_t *)malloc(sizeof(brz_config_data_t));
|
||||||
|
brz->algo = CMPH_BMZ8;
|
||||||
brz->b = 128;
|
brz->b = 128;
|
||||||
brz->hashfuncs[0] = CMPH_HASH_JENKINS;
|
brz->hashfuncs[0] = CMPH_HASH_JENKINS;
|
||||||
brz->hashfuncs[1] = CMPH_HASH_JENKINS;
|
brz->hashfuncs[1] = CMPH_HASH_JENKINS;
|
||||||
@ -111,8 +115,17 @@ cmph_t *brz_new(cmph_config_t *mph, float c)
|
|||||||
cmph_uint32 i;
|
cmph_uint32 i;
|
||||||
cmph_uint32 iterations = 20;
|
cmph_uint32 iterations = 20;
|
||||||
|
|
||||||
DEBUGP("c: %f\n");
|
DEBUGP("c: %f\n", c);
|
||||||
brz_config_data_t *brz = (brz_config_data_t *)mph->data;
|
brz_config_data_t *brz = (brz_config_data_t *)mph->data;
|
||||||
|
switch(brz->algo) // validating restrictions over parameter c.
|
||||||
|
{
|
||||||
|
case CMPH_BMZ8:
|
||||||
|
if (c == 0 || c >= 2.0) c = 1;
|
||||||
|
break;
|
||||||
|
case CMPH_FCH:
|
||||||
|
if (c <= 2.0) c = 2.6;
|
||||||
|
break;
|
||||||
|
}
|
||||||
brz->c = c;
|
brz->c = c;
|
||||||
brz->m = mph->key_source->nkeys;
|
brz->m = mph->key_source->nkeys;
|
||||||
DEBUGP("m: %u\n", brz->m);
|
DEBUGP("m: %u\n", brz->m);
|
||||||
@ -179,6 +192,7 @@ cmph_t *brz_new(cmph_config_t *mph, float c)
|
|||||||
brzf->k = brz->k;
|
brzf->k = brz->k;
|
||||||
brzf->c = brz->c;
|
brzf->c = brz->c;
|
||||||
brzf->m = brz->m;
|
brzf->m = brz->m;
|
||||||
|
brzf->algo = brz->algo;
|
||||||
mphf->data = brzf;
|
mphf->data = brzf;
|
||||||
mphf->size = brz->m;
|
mphf->size = brz->m;
|
||||||
DEBUGP("Successfully generated minimal perfect hash\n");
|
DEBUGP("Successfully generated minimal perfect hash\n");
|
||||||
@ -191,7 +205,7 @@ cmph_t *brz_new(cmph_config_t *mph, float c)
|
|||||||
|
|
||||||
static int brz_gen_mphf(cmph_config_t *mph)
|
static int brz_gen_mphf(cmph_config_t *mph)
|
||||||
{
|
{
|
||||||
cmph_uint32 i, e;
|
cmph_uint32 i, e, error;
|
||||||
brz_config_data_t *brz = (brz_config_data_t *)mph->data;
|
brz_config_data_t *brz = (brz_config_data_t *)mph->data;
|
||||||
cmph_uint32 memory_usage = 0;
|
cmph_uint32 memory_usage = 0;
|
||||||
cmph_uint32 nkeys_in_buffer = 0;
|
cmph_uint32 nkeys_in_buffer = 0;
|
||||||
@ -269,7 +283,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
|
|||||||
memory_usage += keylen + sizeof(keylen);
|
memory_usage += keylen + sizeof(keylen);
|
||||||
h0 = hash(brz->h0, key, keylen) % brz->k;
|
h0 = hash(brz->h0, key, keylen) % brz->k;
|
||||||
|
|
||||||
if ((brz->size[h0] == MAX_BUCKET_SIZE) || ((brz->c >= 1.0) && (cmph_uint8)(brz->c * brz->size[h0]) < brz->size[h0]))
|
if ((brz->size[h0] == MAX_BUCKET_SIZE) || (brz->algo == CMPH_BMZ8 && ((brz->c >= 1.0) && (cmph_uint8)(brz->c * brz->size[h0]) < brz->size[h0])))
|
||||||
{
|
{
|
||||||
free(buffer);
|
free(buffer);
|
||||||
free(buckets_size);
|
free(buckets_size);
|
||||||
@ -337,6 +351,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
|
|||||||
fwrite(cmph_names[CMPH_BRZ], (cmph_uint32)(strlen(cmph_names[CMPH_BRZ]) + 1), 1, brz->mphf_fd);
|
fwrite(cmph_names[CMPH_BRZ], (cmph_uint32)(strlen(cmph_names[CMPH_BRZ]) + 1), 1, brz->mphf_fd);
|
||||||
fwrite(&(brz->m), sizeof(brz->m), 1, brz->mphf_fd);
|
fwrite(&(brz->m), sizeof(brz->m), 1, brz->mphf_fd);
|
||||||
fwrite(&(brz->c), sizeof(cmph_float32), 1, brz->mphf_fd);
|
fwrite(&(brz->c), sizeof(cmph_float32), 1, brz->mphf_fd);
|
||||||
|
fwrite(&(brz->algo), sizeof(brz->algo), 1, brz->mphf_fd);
|
||||||
fwrite(&(brz->k), sizeof(cmph_uint32), 1, brz->mphf_fd); // number of MPHFs
|
fwrite(&(brz->k), sizeof(cmph_uint32), 1, brz->mphf_fd); // number of MPHFs
|
||||||
fwrite(brz->size, sizeof(cmph_uint8)*(brz->k), 1, brz->mphf_fd);
|
fwrite(brz->size, sizeof(cmph_uint8)*(brz->k), 1, brz->mphf_fd);
|
||||||
|
|
||||||
@ -362,6 +377,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
|
|||||||
e = 0;
|
e = 0;
|
||||||
keys_vd = (cmph_uint8 **)calloc(MAX_BUCKET_SIZE, sizeof(cmph_uint8 *));
|
keys_vd = (cmph_uint8 **)calloc(MAX_BUCKET_SIZE, sizeof(cmph_uint8 *));
|
||||||
nkeys_vd = 0;
|
nkeys_vd = 0;
|
||||||
|
error = 0;
|
||||||
while(e < brz->m)
|
while(e < brz->m)
|
||||||
{
|
{
|
||||||
i = brz_min_index(buffer_h0, nflushes);
|
i = brz_min_index(buffer_h0, nflushes);
|
||||||
@ -403,18 +419,49 @@ static int brz_gen_mphf(cmph_config_t *mph)
|
|||||||
cmph_io_adapter_t *source = NULL;
|
cmph_io_adapter_t *source = NULL;
|
||||||
cmph_config_t *config = NULL;
|
cmph_config_t *config = NULL;
|
||||||
cmph_t *mphf_tmp = NULL;
|
cmph_t *mphf_tmp = NULL;
|
||||||
bmz8_data_t * bmzf = NULL;
|
|
||||||
char *bufmphf = NULL;
|
char *bufmphf = NULL;
|
||||||
cmph_uint32 buflenmphf = 0;
|
cmph_uint32 buflenmphf = 0;
|
||||||
// Source of keys
|
// Source of keys
|
||||||
source = cmph_io_byte_vector_adapter(keys_vd, (cmph_uint32)nkeys_vd);
|
source = cmph_io_byte_vector_adapter(keys_vd, (cmph_uint32)nkeys_vd);
|
||||||
config = cmph_config_new(source);
|
config = cmph_config_new(source);
|
||||||
cmph_config_set_algo(config, CMPH_BMZ8);
|
cmph_config_set_algo(config, brz->algo);
|
||||||
|
//cmph_config_set_algo(config, CMPH_BMZ8);
|
||||||
cmph_config_set_graphsize(config, brz->c);
|
cmph_config_set_graphsize(config, brz->c);
|
||||||
mphf_tmp = cmph_new(config);
|
mphf_tmp = cmph_new(config);
|
||||||
bmzf = (bmz8_data_t *)mphf_tmp->data;
|
if (mphf_tmp == NULL)
|
||||||
bufmphf = brz_copy_partial_mphf(brz, bmzf, cur_bucket, &buflenmphf);
|
{
|
||||||
bmzf = NULL;
|
if(mph->verbosity) fprintf(stderr, "ERROR: Can't generate MPHF for bucket %u out of %u\n", cur_bucket + 1, brz->k);
|
||||||
|
error = 1;
|
||||||
|
cmph_config_destroy(config);
|
||||||
|
brz_destroy_keys_vd(keys_vd, nkeys_vd);
|
||||||
|
cmph_io_byte_vector_adapter_destroy(source);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if(mph->verbosity)
|
||||||
|
{
|
||||||
|
if (cur_bucket % 1000 == 0)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "MPHF for bucket %u out of %u was generated.\n", cur_bucket + 1, brz->k);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
switch(brz->algo)
|
||||||
|
{
|
||||||
|
case CMPH_FCH:
|
||||||
|
{
|
||||||
|
fch_data_t * fchf = NULL;
|
||||||
|
fchf = (fch_data_t *)mphf_tmp->data;
|
||||||
|
bufmphf = brz_copy_partial_fch_mphf(brz, fchf, cur_bucket, &buflenmphf);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case CMPH_BMZ8:
|
||||||
|
{
|
||||||
|
bmz8_data_t * bmzf = NULL;
|
||||||
|
bmzf = (bmz8_data_t *)mphf_tmp->data;
|
||||||
|
bufmphf = brz_copy_partial_bmz8_mphf(brz, bmzf, cur_bucket, &buflenmphf);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default: assert(0);
|
||||||
|
}
|
||||||
fwrite(bufmphf, buflenmphf, 1, brz->mphf_fd);
|
fwrite(bufmphf, buflenmphf, 1, brz->mphf_fd);
|
||||||
free(bufmphf);
|
free(bufmphf);
|
||||||
bufmphf = NULL;
|
bufmphf = NULL;
|
||||||
@ -425,11 +472,11 @@ static int brz_gen_mphf(cmph_config_t *mph)
|
|||||||
nkeys_vd = 0;
|
nkeys_vd = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
buffer_manager_destroy(buff_manager);
|
buffer_manager_destroy(buff_manager);
|
||||||
free(keys_vd);
|
free(keys_vd);
|
||||||
free(buffer_merge);
|
free(buffer_merge);
|
||||||
free(buffer_h0);
|
free(buffer_h0);
|
||||||
|
if (error) return 0;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -449,7 +496,29 @@ static void brz_destroy_keys_vd(cmph_uint8 ** keys_vd, cmph_uint8 nkeys)
|
|||||||
for(i = 0; i < nkeys; i++) { free(keys_vd[i]); keys_vd[i] = NULL;}
|
for(i = 0; i < nkeys; i++) { free(keys_vd[i]); keys_vd[i] = NULL;}
|
||||||
}
|
}
|
||||||
|
|
||||||
static char * brz_copy_partial_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_uint32 *buflen)
|
static char * brz_copy_partial_fch_mphf(brz_config_data_t *brz, fch_data_t * fchf, cmph_uint32 index, cmph_uint32 *buflen)
|
||||||
|
{
|
||||||
|
cmph_uint32 i = 0;
|
||||||
|
cmph_uint32 buflenh1 = 0;
|
||||||
|
cmph_uint32 buflenh2 = 0;
|
||||||
|
char * bufh1 = NULL;
|
||||||
|
char * bufh2 = NULL;
|
||||||
|
char * buf = NULL;
|
||||||
|
cmph_uint32 n = fchf->b;//brz->size[index];
|
||||||
|
hash_state_dump(fchf->h1, &bufh1, &buflenh1);
|
||||||
|
hash_state_dump(fchf->h2, &bufh2, &buflenh2);
|
||||||
|
*buflen = buflenh1 + buflenh2 + n + 2*sizeof(cmph_uint32);
|
||||||
|
buf = (char *)malloc(*buflen);
|
||||||
|
memcpy(buf, &buflenh1, sizeof(cmph_uint32));
|
||||||
|
memcpy(buf+sizeof(cmph_uint32), bufh1, buflenh1);
|
||||||
|
memcpy(buf+sizeof(cmph_uint32)+buflenh1, &buflenh2, sizeof(cmph_uint32));
|
||||||
|
memcpy(buf+2*sizeof(cmph_uint32)+buflenh1, bufh2, buflenh2);
|
||||||
|
for (i = 0; i < n; i++) memcpy(buf+2*sizeof(cmph_uint32)+buflenh1+buflenh2+i,(fchf->g + i), 1);
|
||||||
|
free(bufh1);
|
||||||
|
free(bufh2);
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
static char * brz_copy_partial_bmz8_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_uint32 *buflen)
|
||||||
{
|
{
|
||||||
cmph_uint32 buflenh1 = 0;
|
cmph_uint32 buflenh1 = 0;
|
||||||
cmph_uint32 buflenh2 = 0;
|
cmph_uint32 buflenh2 = 0;
|
||||||
@ -470,6 +539,8 @@ static char * brz_copy_partial_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf,
|
|||||||
free(bufh2);
|
free(bufh2);
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int brz_dump(cmph_t *mphf, FILE *fd)
|
int brz_dump(cmph_t *mphf, FILE *fd)
|
||||||
{
|
{
|
||||||
brz_data_t *data = (brz_data_t *)mphf->data;
|
brz_data_t *data = (brz_data_t *)mphf->data;
|
||||||
@ -489,9 +560,6 @@ int brz_dump(cmph_t *mphf, FILE *fd)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void brz_load(FILE *f, cmph_t *mphf)
|
void brz_load(FILE *f, cmph_t *mphf)
|
||||||
{
|
{
|
||||||
char *buf = NULL;
|
char *buf = NULL;
|
||||||
@ -502,31 +570,41 @@ void brz_load(FILE *f, cmph_t *mphf)
|
|||||||
DEBUGP("Loading brz mphf\n");
|
DEBUGP("Loading brz mphf\n");
|
||||||
mphf->data = brz;
|
mphf->data = brz;
|
||||||
fread(&(brz->c), sizeof(cmph_float32), 1, f);
|
fread(&(brz->c), sizeof(cmph_float32), 1, f);
|
||||||
|
fread(&(brz->algo), sizeof(brz->algo), 1, f); // Reading algo.
|
||||||
fread(&(brz->k), sizeof(cmph_uint32), 1, f);
|
fread(&(brz->k), sizeof(cmph_uint32), 1, f);
|
||||||
brz->size = (cmph_uint8 *) malloc(sizeof(cmph_uint8)*brz->k);
|
brz->size = (cmph_uint8 *) malloc(sizeof(cmph_uint8)*brz->k);
|
||||||
fread(brz->size, sizeof(cmph_uint8)*(brz->k), 1, f);
|
fread(brz->size, sizeof(cmph_uint8)*(brz->k), 1, f);
|
||||||
brz->h1 = (hash_state_t **)malloc(sizeof(hash_state_t *)*brz->k);
|
brz->h1 = (hash_state_t **)malloc(sizeof(hash_state_t *)*brz->k);
|
||||||
brz->h2 = (hash_state_t **)malloc(sizeof(hash_state_t *)*brz->k);
|
brz->h2 = (hash_state_t **)malloc(sizeof(hash_state_t *)*brz->k);
|
||||||
brz->g = (cmph_uint8 **) calloc(brz->k, sizeof(cmph_uint8 *));
|
brz->g = (cmph_uint8 **) calloc(brz->k, sizeof(cmph_uint8 *));
|
||||||
DEBUGP("Reading %u h1 and %u h2\n", brz->k, brz->k);
|
DEBUGP("Reading c = %f k = %u algo = %u \n", brz->c, brz->k, brz->algo);
|
||||||
//loading h_i1, h_i2 and g_i.
|
//loading h_i1, h_i2 and g_i.
|
||||||
for(i = 0; i < brz->k; i++)
|
for(i = 0; i < brz->k; i++)
|
||||||
{
|
{
|
||||||
// h1
|
// h1
|
||||||
fread(&buflen, sizeof(cmph_uint32), 1, f);
|
fread(&buflen, sizeof(cmph_uint32), 1, f);
|
||||||
DEBUGP("Hash state has %u bytes\n", buflen);
|
DEBUGP("Hash state 1 has %u bytes\n", buflen);
|
||||||
buf = (char *)malloc(buflen);
|
buf = (char *)malloc(buflen);
|
||||||
fread(buf, buflen, 1, f);
|
fread(buf, buflen, 1, f);
|
||||||
brz->h1[i] = hash_state_load(buf, buflen);
|
brz->h1[i] = hash_state_load(buf, buflen);
|
||||||
free(buf);
|
free(buf);
|
||||||
//h2
|
//h2
|
||||||
fread(&buflen, sizeof(cmph_uint32), 1, f);
|
fread(&buflen, sizeof(cmph_uint32), 1, f);
|
||||||
DEBUGP("Hash state has %u bytes\n", buflen);
|
DEBUGP("Hash state 2 has %u bytes\n", buflen);
|
||||||
buf = (char *)malloc(buflen);
|
buf = (char *)malloc(buflen);
|
||||||
fread(buf, buflen, 1, f);
|
fread(buf, buflen, 1, f);
|
||||||
brz->h2[i] = hash_state_load(buf, buflen);
|
brz->h2[i] = hash_state_load(buf, buflen);
|
||||||
free(buf);
|
free(buf);
|
||||||
n = ceil(brz->c * brz->size[i]);
|
switch(brz->algo)
|
||||||
|
{
|
||||||
|
case CMPH_FCH:
|
||||||
|
n = fch_calc_b(brz->c, brz->size[i]);
|
||||||
|
break;
|
||||||
|
case CMPH_BMZ8:
|
||||||
|
n = ceil(brz->c * brz->size[i]);
|
||||||
|
break;
|
||||||
|
default: assert(0);
|
||||||
|
}
|
||||||
DEBUGP("g_i has %u bytes\n", n);
|
DEBUGP("g_i has %u bytes\n", n);
|
||||||
brz->g[i] = (cmph_uint8 *)calloc(n, sizeof(cmph_uint8));
|
brz->g[i] = (cmph_uint8 *)calloc(n, sizeof(cmph_uint8));
|
||||||
fread(brz->g[i], sizeof(cmph_uint8)*n, 1, f);
|
fread(brz->g[i], sizeof(cmph_uint8)*n, 1, f);
|
||||||
@ -546,9 +624,8 @@ void brz_load(FILE *f, cmph_t *mphf)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
cmph_uint32 brz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
static cmph_uint32 brz_bmz8_search(brz_data_t *brz, const char *key, cmph_uint32 keylen)
|
||||||
{
|
{
|
||||||
brz_data_t *brz = mphf->data;
|
|
||||||
cmph_uint32 h0 = hash(brz->h0, key, keylen) % brz->k;
|
cmph_uint32 h0 = hash(brz->h0, key, keylen) % brz->k;
|
||||||
cmph_uint32 m = brz->size[h0];
|
cmph_uint32 m = brz->size[h0];
|
||||||
cmph_uint32 n = ceil(brz->c * m);
|
cmph_uint32 n = ceil(brz->c * m);
|
||||||
@ -562,6 +639,35 @@ cmph_uint32 brz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
|||||||
DEBUGP("Address: %u\n", mphf_bucket + brz->offset[h0]);
|
DEBUGP("Address: %u\n", mphf_bucket + brz->offset[h0]);
|
||||||
return (mphf_bucket + brz->offset[h0]);
|
return (mphf_bucket + brz->offset[h0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static cmph_uint32 brz_fch_search(brz_data_t *brz, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
cmph_uint32 h0 = hash(brz->h0, key, keylen) % brz->k;
|
||||||
|
cmph_uint32 m = brz->size[h0];
|
||||||
|
cmph_uint32 b = fch_calc_b(brz->c, m);
|
||||||
|
cmph_float32 p1 = fch_calc_p1(m);
|
||||||
|
cmph_float32 p2 = fch_calc_p2(b);
|
||||||
|
cmph_uint32 h1 = hash(brz->h1[h0], key, keylen) % m;
|
||||||
|
cmph_uint32 h2 = hash(brz->h2[h0], key, keylen) % m;
|
||||||
|
cmph_uint8 mphf_bucket = 0;
|
||||||
|
h1 = mixh10h11h12(b, p1, p2, h1);
|
||||||
|
mphf_bucket = (h2 + brz->g[h0][h1]) % m;
|
||||||
|
return (mphf_bucket + brz->offset[h0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_uint32 brz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
brz_data_t *brz = mphf->data;
|
||||||
|
switch(brz->algo)
|
||||||
|
{
|
||||||
|
case CMPH_FCH:
|
||||||
|
return brz_fch_search(brz, key, keylen);
|
||||||
|
case CMPH_BMZ8:
|
||||||
|
return brz_bmz8_search(brz, key, keylen);
|
||||||
|
default: assert(0);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
void brz_destroy(cmph_t *mphf)
|
void brz_destroy(cmph_t *mphf)
|
||||||
{
|
{
|
||||||
cmph_uint32 i;
|
cmph_uint32 i;
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
|
|
||||||
struct __brz_data_t
|
struct __brz_data_t
|
||||||
{
|
{
|
||||||
|
CMPH_ALGO algo; // CMPH algo for generating the MPHFs for the buckets (Just CMPH_FCH and CMPH_BMZ8)
|
||||||
cmph_uint32 m; // edges (words) count
|
cmph_uint32 m; // edges (words) count
|
||||||
cmph_float32 c; // constant c
|
cmph_float32 c; // constant c
|
||||||
cmph_uint8 *size; // size[i] stores the number of edges represented by g[i][...].
|
cmph_uint8 *size; // size[i] stores the number of edges represented by g[i][...].
|
||||||
@ -19,6 +20,7 @@ struct __brz_data_t
|
|||||||
struct __brz_config_data_t
|
struct __brz_config_data_t
|
||||||
{
|
{
|
||||||
CMPH_HASH hashfuncs[3];
|
CMPH_HASH hashfuncs[3];
|
||||||
|
CMPH_ALGO algo; // CMPH algo for generating the MPHFs for the buckets (Just CMPH_FCH and CMPH_BMZ8)
|
||||||
cmph_float32 c; // constant c
|
cmph_float32 c; // constant c
|
||||||
cmph_uint32 m; // edges (words) count
|
cmph_uint32 m; // edges (words) count
|
||||||
cmph_uint8 *size; // size[i] stores the number of edges represented by g[i][...].
|
cmph_uint8 *size; // size[i] stores the number of edges represented by g[i][...].
|
||||||
|
@ -60,6 +60,7 @@ cmph_t *chm_new(cmph_config_t *mph, float c)
|
|||||||
cmph_uint8 *visited = NULL;
|
cmph_uint8 *visited = NULL;
|
||||||
chm_config_data_t *chm = (chm_config_data_t *)mph->data;
|
chm_config_data_t *chm = (chm_config_data_t *)mph->data;
|
||||||
chm->m = mph->key_source->nkeys;
|
chm->m = mph->key_source->nkeys;
|
||||||
|
if (c == 0) c = 2.09;
|
||||||
chm->n = ceil(c * mph->key_source->nkeys);
|
chm->n = ceil(c * mph->key_source->nkeys);
|
||||||
DEBUGP("m (edges): %u n (vertices): %u c: %f\n", chm->m, chm->n, c);
|
DEBUGP("m (edges): %u n (vertices): %u c: %f\n", chm->m, chm->n, c);
|
||||||
chm->graph = graph_new(chm->n, chm->m);
|
chm->graph = graph_new(chm->n, chm->m);
|
||||||
|
@ -354,27 +354,22 @@ cmph_t *cmph_new(cmph_config_t *mph)
|
|||||||
{
|
{
|
||||||
case CMPH_CHM:
|
case CMPH_CHM:
|
||||||
DEBUGP("Creating chm hash\n");
|
DEBUGP("Creating chm hash\n");
|
||||||
if (c == 0) c = 2.09;
|
|
||||||
mphf = chm_new(mph, c);
|
mphf = chm_new(mph, c);
|
||||||
break;
|
break;
|
||||||
case CMPH_BMZ: /* included -- Fabiano */
|
case CMPH_BMZ: /* included -- Fabiano */
|
||||||
DEBUGP("Creating bmz hash\n");
|
DEBUGP("Creating bmz hash\n");
|
||||||
if (c == 0) c = 1.15;
|
|
||||||
mphf = bmz_new(mph, c);
|
mphf = bmz_new(mph, c);
|
||||||
break;
|
break;
|
||||||
case CMPH_BMZ8: /* included -- Fabiano */
|
case CMPH_BMZ8: /* included -- Fabiano */
|
||||||
DEBUGP("Creating bmz8 hash\n");
|
DEBUGP("Creating bmz8 hash\n");
|
||||||
if (c == 0) c = 1.15;
|
|
||||||
mphf = bmz8_new(mph, c);
|
mphf = bmz8_new(mph, c);
|
||||||
break;
|
break;
|
||||||
case CMPH_BRZ: /* included -- Fabiano */
|
case CMPH_BRZ: /* included -- Fabiano */
|
||||||
DEBUGP("Creating brz hash\n");
|
DEBUGP("Creating brz hash\n");
|
||||||
if (c == 0) c = 1.15;
|
|
||||||
mphf = brz_new(mph, c);
|
mphf = brz_new(mph, c);
|
||||||
break;
|
break;
|
||||||
case CMPH_FCH: /* included -- Fabiano */
|
case CMPH_FCH: /* included -- Fabiano */
|
||||||
DEBUGP("Creating fch hash\n");
|
DEBUGP("Creating fch hash\n");
|
||||||
if (c <= 2) c = 2.6;
|
|
||||||
mphf = fch_new(mph, c);
|
mphf = fch_new(mph, c);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
27
src/fch.c
27
src/fch.c
@ -13,8 +13,6 @@
|
|||||||
//#define DEBUG
|
//#define DEBUG
|
||||||
#include "debug.h"
|
#include "debug.h"
|
||||||
|
|
||||||
static cmph_uint32 mixh10h11h12(cmph_uint32 b, cmph_float32 p1, cmph_float32 p2, cmph_uint32 initial_index);
|
|
||||||
static void calc_parameters(fch_config_data_t *fch);
|
|
||||||
static fch_buckets_t * mapping(cmph_config_t *mph);
|
static fch_buckets_t * mapping(cmph_config_t *mph);
|
||||||
static cmph_uint32 * ordering(fch_buckets_t * buckets);
|
static cmph_uint32 * ordering(fch_buckets_t * buckets);
|
||||||
static cmph_uint8 check_for_collisions_h2(fch_config_data_t *fch, fch_buckets_t * buckets, cmph_uint32 *sorted_indexes);
|
static cmph_uint8 check_for_collisions_h2(fch_config_data_t *fch, fch_buckets_t * buckets, cmph_uint32 *sorted_indexes);
|
||||||
@ -57,7 +55,7 @@ void fch_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static cmph_uint32 mixh10h11h12(cmph_uint32 b, cmph_float32 p1, cmph_float32 p2, cmph_uint32 initial_index)
|
cmph_uint32 mixh10h11h12(cmph_uint32 b, cmph_float32 p1, cmph_float32 p2, cmph_uint32 initial_index)
|
||||||
{
|
{
|
||||||
if (initial_index < p1) initial_index %= (cmph_uint32)p2; /* h11 o h10 */
|
if (initial_index < p1) initial_index %= (cmph_uint32)p2; /* h11 o h10 */
|
||||||
else { /* h12 o h10 */
|
else { /* h12 o h10 */
|
||||||
@ -67,11 +65,20 @@ static cmph_uint32 mixh10h11h12(cmph_uint32 b, cmph_float32 p1, cmph_float32 p2,
|
|||||||
return initial_index;
|
return initial_index;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void calc_parameters(fch_config_data_t *fch)
|
|
||||||
|
cmph_uint32 fch_calc_b(cmph_float32 c, cmph_uint32 m)
|
||||||
{
|
{
|
||||||
fch->b = (cmph_uint32)ceil((fch->c*fch->m)/(log(fch->m)/log(2) + 1));
|
return (cmph_uint32)ceil((c*m)/(log(m)/log(2) + 1));
|
||||||
fch->p1 = ceil(0.55*fch->m);
|
}
|
||||||
fch->p2 = ceil(0.3*fch->b);
|
|
||||||
|
cmph_float32 fch_calc_p1(cmph_uint32 m)
|
||||||
|
{
|
||||||
|
return ceil(0.55*m);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_float32 fch_calc_p2(cmph_uint32 b)
|
||||||
|
{
|
||||||
|
return ceil(0.3*b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static fch_buckets_t * mapping(cmph_config_t *mph)
|
static fch_buckets_t * mapping(cmph_config_t *mph)
|
||||||
@ -81,7 +88,9 @@ static fch_buckets_t * mapping(cmph_config_t *mph)
|
|||||||
fch_config_data_t *fch = (fch_config_data_t *)mph->data;
|
fch_config_data_t *fch = (fch_config_data_t *)mph->data;
|
||||||
if (fch->h1) hash_state_destroy(fch->h1);
|
if (fch->h1) hash_state_destroy(fch->h1);
|
||||||
fch->h1 = hash_state_new(fch->hashfuncs[0], fch->m);
|
fch->h1 = hash_state_new(fch->hashfuncs[0], fch->m);
|
||||||
calc_parameters (fch);
|
fch->b = fch_calc_b(fch->c, fch->m);
|
||||||
|
fch->p1 = fch_calc_p1(fch->m);
|
||||||
|
fch->p2 = fch_calc_p2(fch->b);
|
||||||
//DEBUGP("b:%u p1:%f p2:%f\n", fch->b, fch->p1, fch->p2);
|
//DEBUGP("b:%u p1:%f p2:%f\n", fch->b, fch->p1, fch->p2);
|
||||||
buckets = fch_buckets_new(fch->b);
|
buckets = fch_buckets_new(fch->b);
|
||||||
|
|
||||||
@ -247,6 +256,7 @@ cmph_t *fch_new(cmph_config_t *mph, float c)
|
|||||||
fch_config_data_t *fch = (fch_config_data_t *)mph->data;
|
fch_config_data_t *fch = (fch_config_data_t *)mph->data;
|
||||||
fch->m = mph->key_source->nkeys;
|
fch->m = mph->key_source->nkeys;
|
||||||
//DEBUGP("m: %f\n", fch->m);
|
//DEBUGP("m: %f\n", fch->m);
|
||||||
|
if (c <= 2) c = 2.6; // validating restrictions over parameter c.
|
||||||
fch->c = c;
|
fch->c = c;
|
||||||
//DEBUGP("c: %f\n", fch->c);
|
//DEBUGP("c: %f\n", fch->c);
|
||||||
fch->h1 = NULL;
|
fch->h1 = NULL;
|
||||||
@ -389,7 +399,6 @@ cmph_uint32 fch_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
|||||||
fch_data_t *fch = mphf->data;
|
fch_data_t *fch = mphf->data;
|
||||||
cmph_uint32 h1 = hash(fch->h1, key, keylen) % fch->m;
|
cmph_uint32 h1 = hash(fch->h1, key, keylen) % fch->m;
|
||||||
cmph_uint32 h2 = hash(fch->h2, key, keylen) % fch->m;
|
cmph_uint32 h2 = hash(fch->h2, key, keylen) % fch->m;
|
||||||
h1 = hash(fch->h1, key, keylen) % fch->m;
|
|
||||||
h1 = mixh10h11h12 (fch->b, fch->p1, fch->p2, h1);
|
h1 = mixh10h11h12 (fch->b, fch->p1, fch->p2, h1);
|
||||||
//DEBUGP("key: %s h1: %u h2: %u g[h1]: %u\n", key, h1, h2, fch->g[h1]);
|
//DEBUGP("key: %s h1: %u h2: %u g[h1]: %u\n", key, h1, h2, fch->g[h1]);
|
||||||
return (h2 + fch->g[h1]) % fch->m;
|
return (h2 + fch->g[h1]) % fch->m;
|
||||||
|
@ -6,6 +6,12 @@
|
|||||||
typedef struct __fch_data_t fch_data_t;
|
typedef struct __fch_data_t fch_data_t;
|
||||||
typedef struct __fch_config_data_t fch_config_data_t;
|
typedef struct __fch_config_data_t fch_config_data_t;
|
||||||
|
|
||||||
|
/* Parameters calculation */
|
||||||
|
cmph_uint32 fch_calc_b(cmph_float32 c, cmph_uint32 m);
|
||||||
|
cmph_float32 fch_calc_p1(cmph_uint32 m);
|
||||||
|
cmph_float32 fch_calc_p2(cmph_uint32 b);
|
||||||
|
cmph_uint32 mixh10h11h12(cmph_uint32 b, cmph_float32 p1, cmph_float32 p2, cmph_uint32 initial_index);
|
||||||
|
|
||||||
fch_config_data_t *fch_config_new();
|
fch_config_data_t *fch_config_new();
|
||||||
void fch_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
|
void fch_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
|
||||||
void fch_config_destroy(cmph_config_t *mph);
|
void fch_config_destroy(cmph_config_t *mph);
|
||||||
|
@ -217,7 +217,6 @@ int main(int argc, char **argv)
|
|||||||
if (seed == UINT_MAX) seed = (cmph_uint32)time(NULL);
|
if (seed == UINT_MAX) seed = (cmph_uint32)time(NULL);
|
||||||
if(nkeys == UINT_MAX) source = cmph_io_nlfile_adapter(keys_fd);
|
if(nkeys == UINT_MAX) source = cmph_io_nlfile_adapter(keys_fd);
|
||||||
else source = cmph_io_nlnkfile_adapter(keys_fd, nkeys);
|
else source = cmph_io_nlnkfile_adapter(keys_fd, nkeys);
|
||||||
|
|
||||||
if (generate)
|
if (generate)
|
||||||
{
|
{
|
||||||
//Create mphf
|
//Create mphf
|
||||||
@ -230,7 +229,8 @@ int main(int argc, char **argv)
|
|||||||
cmph_config_set_mphf_fd(config, mphf_fd);
|
cmph_config_set_mphf_fd(config, mphf_fd);
|
||||||
cmph_config_set_memory_availability(config, memory_availability);
|
cmph_config_set_memory_availability(config, memory_availability);
|
||||||
cmph_config_set_b(config, b);
|
cmph_config_set_b(config, b);
|
||||||
if((mph_algo == CMPH_BMZ || mph_algo == CMPH_BRZ) && c >= 2.0) c=1.15;
|
//if((mph_algo == CMPH_BMZ || mph_algo == CMPH_BRZ) && c >= 2.0) c=1.15;
|
||||||
|
if(mph_algo == CMPH_BMZ && c >= 2.0) c=1.15;
|
||||||
if (c != 0) cmph_config_set_graphsize(config, c);
|
if (c != 0) cmph_config_set_graphsize(config, c);
|
||||||
mphf = cmph_new(config);
|
mphf = cmph_new(config);
|
||||||
cmph_config_destroy(config);
|
cmph_config_destroy(config);
|
||||||
|
Loading…
Reference in New Issue
Block a user