stable version of BRZ algorithm using buffers

This commit is contained in:
fc_botelho 2006-01-25 19:45:14 +00:00
parent 2fd4b6a46d
commit ab38f8e13f
13 changed files with 398 additions and 327 deletions

View File

@ -1,7 +1,7 @@
bin_PROGRAMS = cmph
lib_LTLIBRARIES = libcmph.la
include_HEADERS = cmph.h cmph_types.h
libcmph_la_SOURCES = debug.h\
libcmph_la_SOURCES = util.h debug.h\
bitbool.h bitbool.c\
cmph_types.h\
hash.h hash_state.h hash.c\
@ -17,9 +17,11 @@ libcmph_la_SOURCES = debug.h\
chm.h chm_structs.h chm.c\
bmz.h bmz_structs.h bmz.c\
bmz8.h bmz8_structs.h bmz8.c\
buffer_manage.h buffer_manage.c\
buffer_entry.h buffer_entry.c\
brz.h brz_structs.h brz.c
libcmph_la_LDFLAGS = -version-info 0:0:0
cmph_SOURCES = main.c ../wingetopt.h ../wingetopt.c
cmph_SOURCES = main.c wingetopt.h wingetopt.c
cmph_LDADD = libcmph.la

View File

@ -64,7 +64,7 @@ cmph_t *bmz_new(cmph_config_t *mph, float c)
cmph_uint8 *used_edges = NULL;
cmph_uint8 restart_mapping = 0;
cmph_uint8 * visited = NULL;
bmz_config_data_t *bmz = (bmz_config_data_t *)mph->data;
DEBUGP("c: %f\n", c);
bmz->m = mph->key_source->nkeys;
@ -93,7 +93,7 @@ cmph_t *bmz_new(cmph_config_t *mph, float c)
bmz->hashes[0] = hash_state_new(bmz->hashfuncs[0], bmz->n);
DEBUGP("hash function 2\n");
bmz->hashes[1] = hash_state_new(bmz->hashfuncs[1], bmz->n);
DEBUGP("Generating edges\n");
DEBUGP("Generating edges\n");
ok = bmz_gen_edges(mph);
if (!ok)
{
@ -109,20 +109,18 @@ cmph_t *bmz_new(cmph_config_t *mph, float c)
}
if (iterations == 0) break;
}
else break;
else break;
}
if (iterations == 0)
{
graph_destroy(bmz->graph);
return NULL;
}
// Ordering step
if (mph->verbosity)
{
fprintf(stderr, "Starting ordering step\n");
}
graph_obtain_critical_nodes(bmz->graph);
// Searching step
@ -164,7 +162,7 @@ cmph_t *bmz_new(cmph_config_t *mph, float c)
free(used_edges);
free(visited);
}while(restart_mapping && iterations_map > 0);
graph_destroy(bmz->graph);
graph_destroy(bmz->graph);
bmz->graph = NULL;
if (iterations_map == 0)
{
@ -181,6 +179,7 @@ cmph_t *bmz_new(cmph_config_t *mph, float c)
bmzf->m = bmz->m;
mphf->data = bmzf;
mphf->size = bmz->m;
DEBUGP("Successfully generated minimal perfect hash\n");
if (mph->verbosity)
{

View File

@ -5,7 +5,6 @@
#include "hash.h"
#include "vqueue.h"
#include "bitbool.h"
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
@ -66,7 +65,6 @@ cmph_t *bmz8_new(cmph_config_t *mph, float c)
cmph_uint8 * visited = NULL;
bmz8_config_data_t *bmz8 = (bmz8_config_data_t *)mph->data;
if (mph->key_source->nkeys >= 256)
{
if (mph->verbosity) fprintf(stderr, "The number of keys in BMZ8 must be lower than 256.\n");
@ -168,9 +166,11 @@ cmph_t *bmz8_new(cmph_config_t *mph, float c)
iterations_map--;
if (mph->verbosity) fprintf(stderr, "Restarting mapping step. %u iterations remaining.\n", iterations_map);
}
free(used_edges);
free(visited);
}while(restart_mapping && iterations_map > 0);
}while(restart_mapping && iterations_map > 0);
graph_destroy(bmz8->graph);
bmz8->graph = NULL;
if (iterations_map == 0)
@ -266,8 +266,8 @@ static cmph_uint8 bmz8_traverse_critical_nodes(bmz8_config_data_t *bmz8, cmph_ui
static cmph_uint8 bmz8_traverse_critical_nodes_heuristic(bmz8_config_data_t *bmz8, cmph_uint8 v, cmph_uint8 * biggest_g_value, cmph_uint8 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited)
{
cmph_uint8 next_g;
cmph_uint32 u; /* Auxiliary vertex */
cmph_uint32 lav; /* lookahead vertex */
cmph_uint32 u;
cmph_uint32 lav;
cmph_uint8 collision;
cmph_uint8 * unused_g_values = NULL;
cmph_uint8 unused_g_values_capacity = 0;
@ -278,7 +278,7 @@ static cmph_uint8 bmz8_traverse_critical_nodes_heuristic(bmz8_config_data_t *bmz
DEBUGP("Labelling critical vertices\n");
bmz8->g[v] = (cmph_uint8)ceil ((double)(*biggest_edge_value)/2) - 1;
SETBIT(visited, v);
next_g = (cmph_uint8)floor((double)(*biggest_edge_value/2)); /* next_g is incremented in the do..while statement*/
next_g = (cmph_uint8)floor((double)(*biggest_edge_value/2));
vqueue_insert(q, v);
while(!vqueue_is_empty(q))
{
@ -324,14 +324,15 @@ static cmph_uint8 bmz8_traverse_critical_nodes_heuristic(bmz8_config_data_t *bmz
{
if(nunused_g_values == unused_g_values_capacity)
{
unused_g_values = realloc(unused_g_values, (unused_g_values_capacity + BUFSIZ)*sizeof(cmph_uint8));
unused_g_values = realloc(unused_g_values, (unused_g_values_capacity + BUFSIZ)*sizeof(cmph_uint8));
unused_g_values_capacity += BUFSIZ;
}
unused_g_values[nunused_g_values++] = next_g;
}
if (next_g > *biggest_g_value) *biggest_g_value = next_g;
}
}
next_g_index--;
if (next_g_index < nunused_g_values) unused_g_values[next_g_index] = unused_g_values[--nunused_g_values];
@ -345,9 +346,11 @@ static cmph_uint8 bmz8_traverse_critical_nodes_heuristic(bmz8_config_data_t *bmz
if(next_g + bmz8->g[lav] > *biggest_edge_value) *biggest_edge_value = next_g + bmz8->g[lav];
}
}
bmz8->g[u] = next_g; // Labelling vertex u.
SETBIT(visited, u);
vqueue_insert(q, u);
vqueue_insert(q, u);
}
}
@ -537,7 +540,7 @@ cmph_uint8 bmz8_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
void bmz8_destroy(cmph_t *mphf)
{
bmz8_data_t *data = (bmz8_data_t *)mphf->data;
free(data->g);
free(data->g);
hash_state_destroy(data->hashes[0]);
hash_state_destroy(data->hashes[1]);
free(data->hashes);

364
src/brz.c
View File

@ -4,10 +4,10 @@
#include "brz.h"
#include "cmph_structs.h"
#include "brz_structs.h"
#include "buffer_manage.h"
#include "cmph.h"
#include "hash.h"
#include "bitbool.h"
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
@ -21,12 +21,14 @@ static int brz_gen_graphs(cmph_config_t *mph);
static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n);
static char * brz_read_key(FILE * fd);
static void brz_destroy_keys_vd(char ** keys_vd, cmph_uint8 nkeys);
static void brz_copy_partial_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_io_adapter_t *source);
static char * brz_copy_partial_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_uint32 *buflen);
//static void brz_copy_partial_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index);
static void brz_flush_g(brz_config_data_t *brz, cmph_uint32 *start_index, FILE * fd);
brz_config_data_t *brz_config_new()
{
brz_config_data_t *brz = NULL;
brz = (brz_config_data_t *)malloc(sizeof(brz_config_data_t));
brz->b = 128;
brz->hashfuncs[0] = CMPH_HASH_JENKINS;
brz->hashfuncs[1] = CMPH_HASH_JENKINS;
brz->hashfuncs[2] = CMPH_HASH_JENKINS;
@ -35,10 +37,11 @@ brz_config_data_t *brz_config_new()
brz->g = NULL;
brz->h1 = NULL;
brz->h2 = NULL;
brz->h3 = NULL;
brz->h0 = NULL;
brz->memory_availability = 1024*1024;
brz->tmp_dir = (cmph_uint8 *)calloc(10, sizeof(cmph_uint8));
strcpy(brz->tmp_dir, "/var/tmp/\0");
brz->mphf_fd = NULL;
strcpy((char *)(brz->tmp_dir), "/var/tmp/");
assert(brz);
return brz;
}
@ -46,6 +49,7 @@ brz_config_data_t *brz_config_new()
void brz_config_destroy(cmph_config_t *mph)
{
brz_config_data_t *data = (brz_config_data_t *)mph->data;
free(data->tmp_dir);
DEBUGP("Destroying algorithm dependent data\n");
free(data);
}
@ -74,22 +78,35 @@ void brz_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir)
brz_config_data_t *brz = (brz_config_data_t *)mph->data;
if(tmp_dir)
{
cmph_uint32 len = strlen(tmp_dir);
cmph_uint32 len = strlen((char *)tmp_dir);
free(brz->tmp_dir);
if(tmp_dir[len-1] != '/')
{
brz->tmp_dir = calloc(len+2, sizeof(cmph_uint8));
sprintf(brz->tmp_dir, "%s/", tmp_dir);
sprintf((char *)(brz->tmp_dir), "%s/", (char *)tmp_dir);
}
else
{
brz->tmp_dir = calloc(len+1, sizeof(cmph_uint8));
sprintf(brz->tmp_dir, "%s", tmp_dir);
sprintf((char *)(brz->tmp_dir), "%s", (char *)tmp_dir);
}
}
}
void brz_config_set_mphf_fd(cmph_config_t *mph, FILE *mphf_fd)
{
brz_config_data_t *brz = (brz_config_data_t *)mph->data;
brz->mphf_fd = mphf_fd;
assert(brz->mphf_fd);
}
void brz_config_set_b(cmph_config_t *mph, cmph_uint8 b)
{
brz_config_data_t *brz = (brz_config_data_t *)mph->data;
brz->b = b;
}
cmph_t *brz_new(cmph_config_t *mph, float c)
{
cmph_t *mphf = NULL;
@ -102,7 +119,7 @@ cmph_t *brz_new(cmph_config_t *mph, float c)
brz->c = c;
brz->m = mph->key_source->nkeys;
DEBUGP("m: %u\n", brz->m);
brz->k = ceil(brz->m/170);
brz->k = ceil(brz->m/(brz->b));
DEBUGP("k: %u\n", brz->k);
brz->size = (cmph_uint8 *) calloc(brz->k, sizeof(cmph_uint8));
@ -112,22 +129,22 @@ cmph_t *brz_new(cmph_config_t *mph, float c)
fprintf(stderr, "Partioning the set of keys.\n");
}
brz->h1 = (hash_state_t **)malloc(sizeof(hash_state_t *)*brz->k);
brz->h2 = (hash_state_t **)malloc(sizeof(hash_state_t *)*brz->k);
brz->g = (cmph_uint8 **) malloc(sizeof(cmph_uint8 *) *brz->k);
// brz->h1 = (hash_state_t **)calloc(brz->k, sizeof(hash_state_t *));
// brz->h2 = (hash_state_t **)calloc(brz->k, sizeof(hash_state_t *));
// brz->g = (cmph_uint8 **) calloc(brz->k, sizeof(cmph_uint8 *));
while(1)
{
int ok;
DEBUGP("hash function 3\n");
brz->h3 = hash_state_new(brz->hashfuncs[2], brz->k);
brz->h0 = hash_state_new(brz->hashfuncs[2], brz->k);
DEBUGP("Generating graphs\n");
ok = brz_gen_graphs(mph);
if (!ok)
{
--iterations;
hash_state_destroy(brz->h3);
brz->h3 = NULL;
hash_state_destroy(brz->h0);
brz->h0 = NULL;
DEBUGP("%u iterations remaining to create the graphs in a external file\n", iterations);
if (mph->verbosity)
{
@ -150,7 +167,6 @@ cmph_t *brz_new(cmph_config_t *mph, float c)
{
brz->offset[i] = brz->size[i-1] + brz->offset[i-1];
}
// Generating a mphf
mphf = (cmph_t *)malloc(sizeof(cmph_t));
mphf->algo = mph->algo;
@ -161,14 +177,12 @@ cmph_t *brz_new(cmph_config_t *mph, float c)
brz->h1 = NULL; //transfer memory ownership
brzf->h2 = brz->h2;
brz->h2 = NULL; //transfer memory ownership
brzf->h3 = brz->h3;
brz->h3 = NULL; //transfer memory ownership
brzf->h0 = brz->h0;
brz->h0 = NULL; //transfer memory ownership
brzf->size = brz->size;
brz->size = NULL; //transfer memory ownership
brzf->offset = brz->offset;
brz->offset = NULL; //transfer memory ownership
brzf->tmp_dir = brz->tmp_dir;
brz->tmp_dir = NULL; //transfer memory ownership
brzf->k = brz->k;
brzf->c = brz->c;
brzf->m = brz->m;
@ -186,47 +200,42 @@ static int brz_gen_graphs(cmph_config_t *mph)
{
cmph_uint32 i, e;
brz_config_data_t *brz = (brz_config_data_t *)mph->data;
//cmph_uint32 memory_availability = 200*1024*1024;
cmph_uint32 memory_usage = 0;
cmph_uint32 nkeys_in_buffer = 0;
cmph_uint8 *buffer = (cmph_uint8 *)malloc(brz->memory_availability);
cmph_uint32 *buckets_size = (cmph_uint32 *)calloc(brz->k, sizeof(cmph_uint32));
cmph_uint32 *buckets_size = (cmph_uint32 *)calloc(brz->k, sizeof(cmph_uint32));
cmph_uint32 *keys_index = NULL;
cmph_uint8 **buffer_merge = NULL;
cmph_uint32 *buffer_h3 = NULL;
cmph_uint32 *buffer_h0 = NULL;
cmph_uint32 nflushes = 0;
cmph_uint32 h3;
cmph_uint32 h0;
FILE * tmp_fd = NULL;
FILE ** tmp_fds = NULL;
buffer_manage_t * buff_manage = NULL;
char *filename = NULL;
char *key = NULL;
cmph_uint32 keylen;
cmph_uint32 max_size = 0;
cmph_uint32 cur_bucket = 0;
cmph_uint8 nkeys_vd = 0;
cmph_uint32 start_index = 0;
char ** keys_vd = NULL;
mph->key_source->rewind(mph->key_source->data);
DEBUGP("Generating graphs from %u keys\n", brz->m);
// Partitioning
for (e = 0; e < brz->m; ++e)
{
mph->key_source->read(mph->key_source->data, &key, &keylen);
/* Buffers management */
if (memory_usage + keylen + 1 > brz->memory_availability) // flush buffers
{
{
if(mph->verbosity)
{
fprintf(stderr, "Flushing %u\n", nkeys_in_buffer);
}
cmph_uint32 value = buckets_size[0];
cmph_uint32 sum = 0;
cmph_uint32 keylen1 = 0;
buckets_size[0] = 0;
cmph_uint32 keylen1 = 0;
buckets_size[0] = 0;
for(i = 1; i < brz->k; i++)
{
if(buckets_size[i] == 0) continue;
@ -239,20 +248,20 @@ static int brz_gen_graphs(cmph_config_t *mph)
keys_index = (cmph_uint32 *)calloc(nkeys_in_buffer, sizeof(cmph_uint32));
for(i = 0; i < nkeys_in_buffer; i++)
{
keylen1 = strlen(buffer + memory_usage);
h3 = hash(brz->h3, buffer + memory_usage, keylen1) % brz->k;
keys_index[buckets_size[h3]] = memory_usage;
buckets_size[h3]++;
keylen1 = strlen((char *)(buffer + memory_usage));
h0 = hash(brz->h0, (char *)(buffer + memory_usage), keylen1) % brz->k;
keys_index[buckets_size[h0]] = memory_usage;
buckets_size[h0]++;
memory_usage = memory_usage + keylen1 + 1;
}
filename = (char *)calloc(strlen(brz->tmp_dir) + 11, sizeof(char));
filename = (char *)calloc(strlen((char *)(brz->tmp_dir)) + 11, sizeof(char));
sprintf(filename, "%s%u.cmph",brz->tmp_dir, nflushes);
tmp_fd = fopen(filename, "wb");
free(filename);
filename = NULL;
for(i = 0; i < nkeys_in_buffer; i++)
{
keylen1 = strlen(buffer + keys_index[i]) + 1;
keylen1 = strlen((char *)(buffer + keys_index[i])) + 1;
fwrite(buffer + keys_index[i], 1, keylen1, tmp_fd);
}
nkeys_in_buffer = 0;
@ -264,17 +273,16 @@ static int brz_gen_graphs(cmph_config_t *mph)
}
memcpy(buffer + memory_usage, key, keylen + 1);
memory_usage = memory_usage + keylen + 1;
h3 = hash(brz->h3, key, keylen) % brz->k;
if ((brz->size[h3] == MAX_BUCKET_SIZE) || ((brz->c >= 1.0) && (cmph_uint8)(brz->c * brz->size[h3]) < brz->size[h3]))
h0 = hash(brz->h0, key, keylen) % brz->k;
if ((brz->size[h0] == MAX_BUCKET_SIZE) || ((brz->c >= 1.0) && (cmph_uint8)(brz->c * brz->size[h0]) < brz->size[h0]))
{
free(buffer);
free(buckets_size);
return 0;
}
brz->size[h3] = brz->size[h3] + 1;
buckets_size[h3] ++;
brz->size[h0] = brz->size[h0] + 1;
buckets_size[h0] ++;
nkeys_in_buffer++;
mph->key_source->dispose(mph->key_source->data, key, keylen);
}
@ -299,20 +307,20 @@ static int brz_gen_graphs(cmph_config_t *mph)
keys_index = (cmph_uint32 *)calloc(nkeys_in_buffer, sizeof(cmph_uint32));
for(i = 0; i < nkeys_in_buffer; i++)
{
keylen1 = strlen(buffer + memory_usage);
h3 = hash(brz->h3, buffer + memory_usage, keylen1) % brz->k;
keys_index[buckets_size[h3]] = memory_usage;
buckets_size[h3]++;
keylen1 = strlen((char *)(buffer + memory_usage));
h0 = hash(brz->h0, (char *)(buffer + memory_usage), keylen1) % brz->k;
keys_index[buckets_size[h0]] = memory_usage;
buckets_size[h0]++;
memory_usage = memory_usage + keylen1 + 1;
}
filename = (char *)calloc(strlen(brz->tmp_dir) + 11, sizeof(char));
filename = (char *)calloc(strlen((char *)(brz->tmp_dir)) + 11, sizeof(char));
sprintf(filename, "%s%u.cmph",brz->tmp_dir, nflushes);
tmp_fd = fopen(filename, "wb");
free(filename);
filename = NULL;
for(i = 0; i < nkeys_in_buffer; i++)
{
keylen1 = strlen(buffer + keys_index[i]) + 1;
keylen1 = strlen((char *)(buffer + keys_index[i])) + 1;
fwrite(buffer + keys_index[i], 1, keylen1, tmp_fd);
}
nkeys_in_buffer = 0;
@ -322,66 +330,70 @@ static int brz_gen_graphs(cmph_config_t *mph)
free(keys_index);
fclose(tmp_fd);
}
free(buffer);
free(buckets_size);
if(nflushes > 1024) return 0; // Too many files generated.
// mphf generation
if(mph->verbosity)
{
fprintf(stderr, "\nMPHF generation \n");
}
tmp_fds = (FILE **)calloc(nflushes, sizeof(FILE *));
/* Starting to dump to disk the resultant MPHF: __cmph_dump function */
fwrite(cmph_names[CMPH_BRZ], (cmph_uint32)(strlen(cmph_names[CMPH_BRZ]) + 1), 1, brz->mphf_fd);
fwrite(&(brz->m), sizeof(brz->m), 1, brz->mphf_fd);
fwrite(&(brz->c), sizeof(cmph_float32), 1, brz->mphf_fd);
fwrite(&(brz->k), sizeof(cmph_uint32), 1, brz->mphf_fd); // number of MPHFs
fwrite(brz->size, sizeof(cmph_uint8)*(brz->k), 1, brz->mphf_fd);
//tmp_fds = (FILE **)calloc(nflushes, sizeof(FILE *));
buff_manage = buffer_manage_new(brz->memory_availability, nflushes);
buffer_merge = (cmph_uint8 **)calloc(nflushes, sizeof(cmph_uint8 *));
buffer_h3 = (cmph_uint32 *)calloc(nflushes, sizeof(cmph_uint32));
filename = (char *)calloc(strlen(brz->tmp_dir) + 11, sizeof(char));
sprintf(filename, "%stmpg.cmph",brz->tmp_dir);
tmp_fd = fopen(filename, "w");
free(filename);
buffer_h0 = (cmph_uint32 *)calloc(nflushes, sizeof(cmph_uint32));
memory_usage = 0;
for(i = 0; i < nflushes; i++)
{
filename = (char *)calloc(strlen(brz->tmp_dir) + 11, sizeof(char));
filename = (char *)calloc(strlen((char *)(brz->tmp_dir)) + 11, sizeof(char));
sprintf(filename, "%s%u.cmph",brz->tmp_dir, i);
tmp_fds[i] = fopen(filename, "rb");
buffer_manage_open(buff_manage, i, filename);
free(filename);
filename = NULL;
key = brz_read_key(tmp_fds[i]);
key = (char *)buffer_manage_read_key(buff_manage, i);
keylen = strlen(key);
h3 = hash(brz->h3, key, keylen) % brz->k;
buffer_h3[i] = h3;
h0 = hash(brz->h0, key, keylen) % brz->k;
buffer_h0[i] = h0;
buffer_merge[i] = (cmph_uint8 *)calloc(keylen + 1, sizeof(cmph_uint8));
memcpy(buffer_merge[i], key, keylen + 1);
free(key);
}
e = 0;
keys_vd = (char **)calloc(MAX_BUCKET_SIZE, sizeof(char *));
nkeys_vd = 0;
while(e < brz->m)
{
i = brz_min_index(buffer_h3, nflushes);
cur_bucket = buffer_h3[i];
key = brz_read_key(tmp_fds[i]);
i = brz_min_index(buffer_h0, nflushes);
cur_bucket = buffer_h0[i];
key = (char *)buffer_manage_read_key(buff_manage, i);
if(key)
{
while(key)
{
keylen = strlen(key);
h3 = hash(brz->h3, key, keylen) % brz->k;
if (h3 != buffer_h3[i]) break;
h0 = hash(brz->h0, key, keylen) % brz->k;
if (h0 != buffer_h0[i]) break;
keys_vd[nkeys_vd++] = key;
key = NULL; //transfer memory ownership
e++;
key = brz_read_key(tmp_fds[i]);
key = (char *)buffer_manage_read_key(buff_manage, i);
}
if (key)
{
assert(nkeys_vd < brz->size[cur_bucket]);
keys_vd[nkeys_vd++] = buffer_merge[i];
keys_vd[nkeys_vd++] = (char *)buffer_merge[i];
buffer_merge[i] = NULL; //transfer memory ownership
e++;
buffer_h3[i] = h3;
buffer_h0[i] = h0;
buffer_merge[i] = (cmph_uint8 *)calloc(keylen + 1, sizeof(cmph_uint8));
memcpy(buffer_merge[i], key, keylen + 1);
free(key);
@ -390,10 +402,10 @@ static int brz_gen_graphs(cmph_config_t *mph)
if(!key)
{
assert(nkeys_vd < brz->size[cur_bucket]);
keys_vd[nkeys_vd++] = buffer_merge[i];
keys_vd[nkeys_vd++] = (char *)buffer_merge[i];
buffer_merge[i] = NULL; //transfer memory ownership
e++;
buffer_h3[i] = UINT_MAX;
buffer_merge[i] = NULL;
buffer_h0[i] = UINT_MAX;
}
if(nkeys_vd == brz->size[cur_bucket]) // Generating mphf for each bucket.
@ -402,35 +414,33 @@ static int brz_gen_graphs(cmph_config_t *mph)
cmph_config_t *config = NULL;
cmph_t *mphf_tmp = NULL;
bmz8_data_t * bmzf = NULL;
char *bufmphf = NULL;
cmph_uint32 buflenmphf = 0;
// Source of keys
if(nkeys_vd > max_size) max_size = nkeys_vd;
source = cmph_io_vector_adapter(keys_vd, (cmph_uint32)nkeys_vd);
config = cmph_config_new(source);
cmph_config_set_algo(config, CMPH_BMZ8);
cmph_config_set_graphsize(config, brz->c);
mphf_tmp = cmph_new(config);
bmzf = (bmz8_data_t *)mphf_tmp->data;
brz_copy_partial_mphf(brz, bmzf, cur_bucket, source);
memory_usage += brz->size[cur_bucket];
if((cur_bucket+1 == brz->k)||(memory_usage > brz->memory_availability))
{
brz_flush_g(brz, &start_index, tmp_fd);
memory_usage = 0;
}
bufmphf = brz_copy_partial_mphf(brz, bmzf, cur_bucket, &buflenmphf);
bmzf = NULL;
fwrite(bufmphf, buflenmphf, 1, brz->mphf_fd);
free(bufmphf);
bufmphf = NULL;
cmph_config_destroy(config);
brz_destroy_keys_vd(keys_vd, nkeys_vd);
brz_destroy_keys_vd(keys_vd, nkeys_vd);
cmph_destroy(mphf_tmp);
free(source);
cmph_io_vector_adapter_destroy(source);
nkeys_vd = 0;
}
}
fclose(tmp_fd);
for(i = 0; i < nflushes; i++) fclose(tmp_fds[i]);
free(tmp_fds);
buffer_manage_destroy(buff_manage);
free(keys_vd);
free(buffer_merge);
free(buffer_h3);
fprintf(stderr, "Maximal Size: %u\n", max_size);
free(buffer_h0);
return 1;
}
@ -467,7 +477,7 @@ static char * brz_read_key(FILE * fd)
static void brz_destroy_keys_vd(char ** keys_vd, cmph_uint8 nkeys)
{
cmph_uint8 i;
for(i = 0; i < nkeys; i++) free(keys_vd[i]);
for(i = 0; i < nkeys; i++) { free(keys_vd[i]); keys_vd[i] = NULL;}
}
static void brz_flush_g(brz_config_data_t *brz, cmph_uint32 *start_index, FILE * fd)
@ -481,93 +491,84 @@ static void brz_flush_g(brz_config_data_t *brz, cmph_uint32 *start_index, FILE *
}
}
static void brz_copy_partial_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_io_adapter_t *source)
static char * brz_copy_partial_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_uint32 *buflen)
{
cmph_uint32 i;
cmph_uint32 buflenh1 = 0;
cmph_uint32 buflenh2 = 0;
char * bufh1 = NULL;
char * bufh2 = NULL;
char * buf = NULL;
cmph_uint32 n = ceil(brz->c * brz->size[index]);
hash_state_dump(bmzf->hashes[0], &bufh1, &buflenh1);
hash_state_dump(bmzf->hashes[1], &bufh2, &buflenh2);
*buflen = buflenh1 + buflenh2 + n + 2*sizeof(cmph_uint32);
buf = (char *)malloc(*buflen);
//fprintf(stderr,"entrei passei\n");
memcpy(buf, &buflenh1, sizeof(cmph_uint32));
memcpy(buf+sizeof(cmph_uint32), bufh1, buflenh1);
memcpy(buf+sizeof(cmph_uint32)+buflenh1, &buflenh2, sizeof(cmph_uint32));
memcpy(buf+2*sizeof(cmph_uint32)+buflenh1, bufh2, buflenh2);
memcpy(buf+2*sizeof(cmph_uint32)+buflenh1+buflenh2,bmzf->g, n);
free(bufh1);
free(bufh2);
return buf;
}
/*static void brz_copy_partial_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index)
{
cmph_uint32 i;
cmph_uint32 n = ceil(brz->c * brz->size[index]);
if( brz->g[index]) {fprintf(stderr, "index:%u\n",index);exit(10);}
brz->g[index] = (cmph_uint8 *)calloc(n, sizeof(cmph_uint8));
for(i = 0; i < n; i++)
{
brz->g[index][i] = bmzf->g[i];
//fprintf(stderr, "gsrc[%u]: %u gdest: %u\n", i, (cmph_uint8) bmzf->g[i], brz->g[index][i]);
}
brz->h1[index] = hash_state_copy(bmzf->hashes[0]);
brz->h2[index] = hash_state_copy(bmzf->hashes[1]);
}
brz->h1[index] = hash_state_copy(bmzf->hashes[0]);
brz->h2[index] = hash_state_copy(bmzf->hashes[1]);
}
*/
int brz_dump(cmph_t *mphf, FILE *fd)
{
brz_data_t *data = (brz_data_t *)mphf->data;
char *buf = NULL;
cmph_uint32 buflen;
cmph_uint32 i;
brz_data_t *data = (brz_data_t *)mphf->data;
FILE * tmpg_fd = NULL;
char * filename = NULL;
filename = (char *)calloc(strlen(data->tmp_dir) + 11, sizeof(char));
sprintf(filename, "%stmpg.cmph",data->tmp_dir);
tmpg_fd = fopen(filename, "rb");
free(filename);
DEBUGP("Dumping brzf\n");
__cmph_dump(mphf, fd);
fwrite(&(data->k), sizeof(cmph_uint32), 1, fd);
//dumping h1 and h2.
for(i = 0; i < data->k; i++)
{
// h1
hash_state_dump(data->h1[i], &buf, &buflen);
DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
fwrite(&buflen, sizeof(cmph_uint32), 1, fd);
fwrite(buf, buflen, 1, fd);
free(buf);
// h2
hash_state_dump(data->h2[i], &buf, &buflen);
DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
fwrite(&buflen, sizeof(cmph_uint32), 1, fd);
fwrite(buf, buflen, 1, fd);
free(buf);
}
// Dumping h3.
hash_state_dump(data->h3, &buf, &buflen);
DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
fwrite(&buflen, sizeof(cmph_uint32), 1, fd);
fwrite(buf, buflen, 1, fd);
free(buf);
// Dumping c, m, size vector and offset vector.
fwrite(&(data->c), sizeof(cmph_float32), 1, fd);
fwrite(&(data->m), sizeof(cmph_uint32), 1, fd);
fwrite(data->size, sizeof(cmph_uint8)*(data->k), 1, fd);
// The initial part of the MPHF have already been dumped to disk during construction
// Dumping h0
hash_state_dump(data->h0, &buf, &buflen);
DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
fwrite(&buflen, sizeof(cmph_uint32), 1, fd);
fwrite(buf, buflen, 1, fd);
free(buf);
// Dumping m and the vector offset.
fwrite(&(data->m), sizeof(cmph_uint32), 1, fd);
fwrite(data->offset, sizeof(cmph_uint32)*(data->k), 1, fd);
// Dumping g function.
for(i = 0; i < data->k; i++)
{
cmph_uint32 n = ceil(data->c * data->size[i]);
buf = (char *)calloc(n, sizeof(cmph_uint8));
fread(buf, sizeof(cmph_uint8), n, tmpg_fd);
fwrite(buf, sizeof(cmph_uint8), n, fd);
free(buf);
}
fclose(tmpg_fd);
return 1;
}
void brz_load(FILE *f, cmph_t *mphf)
{
char *buf = NULL;
cmph_uint32 buflen;
cmph_uint32 i;
cmph_uint32 i, n;
brz_data_t *brz = (brz_data_t *)malloc(sizeof(brz_data_t));
DEBUGP("Loading brz mphf\n");
mphf->data = brz;
fread(&(brz->c), sizeof(cmph_float32), 1, f);
fread(&(brz->k), sizeof(cmph_uint32), 1, f);
brz->size = (cmph_uint8 *) malloc(sizeof(cmph_uint8)*brz->k);
fread(brz->size, sizeof(cmph_uint8)*(brz->k), 1, f);
brz->h1 = (hash_state_t **)malloc(sizeof(hash_state_t *)*brz->k);
brz->h2 = (hash_state_t **)malloc(sizeof(hash_state_t *)*brz->k);
brz->g = (cmph_uint8 **) calloc(brz->k, sizeof(cmph_uint8 *));
DEBUGP("Reading %u h1 and %u h2\n", brz->k, brz->k);
//loading h1 and h2.
//loading h_i1, h_i2 and g_i.
for(i = 0; i < brz->k; i++)
{
// h1
@ -583,69 +584,62 @@ void brz_load(FILE *f, cmph_t *mphf)
buf = (char *)malloc(buflen);
fread(buf, buflen, 1, f);
brz->h2[i] = hash_state_load(buf, buflen);
free(buf);
free(buf);
n = ceil(brz->c * brz->size[i]);
DEBUGP("g_i has %u bytes\n", n);
brz->g[i] = (cmph_uint8 *)calloc(n, sizeof(cmph_uint8));
fread(brz->g[i], sizeof(cmph_uint8)*n, 1, f);
}
//loading h3
//loading h0
fread(&buflen, sizeof(cmph_uint32), 1, f);
DEBUGP("Hash state has %u bytes\n", buflen);
buf = (char *)malloc(buflen);
fread(buf, buflen, 1, f);
brz->h3 = hash_state_load(buf, buflen);
free(buf);
brz->h0 = hash_state_load(buf, buflen);
free(buf);
//loading c, m, size vector and offset vector.
fread(&(brz->c), sizeof(cmph_float32), 1, f);
//loading c, m, and the vector offset.
fread(&(brz->m), sizeof(cmph_uint32), 1, f);
brz->size = (cmph_uint8 *) malloc(sizeof(cmph_uint8)*brz->k);
brz->offset = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*brz->k);
fread(brz->size, sizeof(cmph_uint8)*(brz->k), 1, f);
fread(brz->offset, sizeof(cmph_uint32)*(brz->k), 1, f);
//loading g function.
brz->g = (cmph_uint8 **) malloc(sizeof(cmph_uint8 *)*brz->k);
for(i = 0; i < brz->k; i++)
{
cmph_uint32 n = ceil(brz->c * brz->size[i]);
brz->g[i] = (cmph_uint8 *)malloc(sizeof(cmph_uint8)*n);
fread(brz->g[i], sizeof(cmph_uint8)*n, 1, f);
}
fread(brz->offset, sizeof(cmph_uint32)*(brz->k), 1, f);
return;
}
cmph_uint32 brz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
{
brz_data_t *brz = mphf->data;
cmph_uint32 h3 = hash(brz->h3, key, keylen) % brz->k;
cmph_uint32 m = brz->size[h3];
cmph_uint32 h0 = hash(brz->h0, key, keylen) % brz->k;
cmph_uint32 m = brz->size[h0];
cmph_uint32 n = ceil(brz->c * m);
cmph_uint32 h1 = hash(brz->h1[h3], key, keylen) % n;
cmph_uint32 h2 = hash(brz->h2[h3], key, keylen) % n;
cmph_uint32 h1 = hash(brz->h1[h0], key, keylen) % n;
cmph_uint32 h2 = hash(brz->h2[h0], key, keylen) % n;
cmph_uint8 mphf_bucket;
if (h1 == h2 && ++h2 >= n) h2 = 0;
mphf_bucket = brz->g[h3][h1] + brz->g[h3][h2];
DEBUGP("key: %s h1: %u h2: %u h3: %u\n", key, h1, h2, h3);
DEBUGP("key: %s g[h1]: %u g[h2]: %u offset[h3]: %u edges: %u\n", key, brz->g[h3][h1], brz->g[h3][h2], brz->offset[h3], brz->m);
DEBUGP("Address: %u\n", mphf_bucket + brz->offset[h3]);
return (mphf_bucket + brz->offset[h3]);
mphf_bucket = brz->g[h0][h1] + brz->g[h0][h2];
DEBUGP("key: %s h1: %u h2: %u h0: %u\n", key, h1, h2, h0);
DEBUGP("key: %s g[h1]: %u g[h2]: %u offset[h0]: %u edges: %u\n", key, brz->g[h0][h1], brz->g[h0][h2], brz->offset[h0], brz->m);
DEBUGP("Address: %u\n", mphf_bucket + brz->offset[h0]);
return (mphf_bucket + brz->offset[h0]);
}
void brz_destroy(cmph_t *mphf)
{
cmph_uint32 i;
brz_data_t *data = (brz_data_t *)mphf->data;
for(i = 0; i < data->k; i++)
if(data->g)
{
free(data->g[i]);
hash_state_destroy(data->h1[i]);
hash_state_destroy(data->h2[i]);
for(i = 0; i < data->k; i++)
{
free(data->g[i]);
hash_state_destroy(data->h1[i]);
hash_state_destroy(data->h2[i]);
}
free(data->g);
free(data->h1);
free(data->h2);
}
hash_state_destroy(data->h3);
free(data->g);
free(data->h1);
free(data->h2);
hash_state_destroy(data->h0);
free(data->size);
free(data->offset);
free(data->tmp_dir);
free(data);
free(mphf);
}

View File

@ -9,6 +9,8 @@ typedef struct __brz_config_data_t brz_config_data_t;
brz_config_data_t *brz_config_new();
void brz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
void brz_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir);
void brz_config_set_mphf_fd(cmph_config_t *mph, FILE *mphf_fd);
void brz_config_set_b(cmph_config_t *mph, cmph_uint8 b);
void brz_config_set_memory_availability(cmph_config_t *mph, cmph_uint32 memory_availability);
void brz_config_destroy(cmph_config_t *mph);
cmph_t *brz_new(cmph_config_t *mph, float c);

View File

@ -13,8 +13,7 @@ struct __brz_data_t
cmph_uint32 k; // number of components
hash_state_t **h1;
hash_state_t **h2;
hash_state_t * h3;
cmph_uint8 * tmp_dir; // temporary directory
hash_state_t * h0;
};
struct __brz_config_data_t
@ -25,12 +24,14 @@ struct __brz_config_data_t
cmph_uint8 *size; // size[i] stores the number of edges represented by g[i][...].
cmph_uint32 *offset; // offset[i] stores the sum: size[0] + size[1] + ... size[i-1].
cmph_uint8 **g; // g function.
cmph_uint8 b; // parameter b.
cmph_uint32 k; // number of components
hash_state_t **h1;
hash_state_t **h2;
hash_state_t * h3;
hash_state_t * h0;
cmph_uint32 memory_availability;
cmph_uint8 * tmp_dir; // temporary directory
FILE * mphf_fd; // mphf file
};
#endif

View File

@ -49,7 +49,8 @@ static int key_nlfile_read(void *data, char **key, cmph_uint32 *keylen)
static int key_vector_read(void *data, char **key, cmph_uint32 *keylen)
{
cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
/*
cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
char **keys_vd = (char **)cmph_vector->vector;
if (keys_vd + cmph_vector->position == NULL) return -1;
@ -57,7 +58,17 @@ static int key_vector_read(void *data, char **key, cmph_uint32 *keylen)
*key = (char *)malloc(*keylen + 1);
strcpy(*key, *(keys_vd + cmph_vector->position));
cmph_vector->position = cmph_vector->position + 1;
*/
cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
char **keys_vd = (char **)cmph_vector->vector;
// if (keys_vd + cmph_vector->position == NULL) return -1;
*keylen = strlen(keys_vd[cmph_vector->position]);
*key = (char *)malloc(*keylen + 1);
strcpy(*key, keys_vd[cmph_vector->position]);
cmph_vector->position = cmph_vector->position + 1;
return *keylen;
}
@ -68,7 +79,7 @@ static void key_nlfile_dispose(void *data, char *key, cmph_uint32 keylen)
static void key_vector_dispose(void *data, char *key, cmph_uint32 keylen)
{
key_nlfile_dispose(data, key, keylen);
free(key);
}
static void key_nlfile_rewind(void *data)
@ -236,7 +247,43 @@ void cmph_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir)
default:
assert(0);
}
}
void cmph_config_set_mphf_fd(cmph_config_t *mph, FILE *mphf_fd)
{
switch (mph->algo)
{
case CMPH_CHM:
break;
case CMPH_BMZ: /* included -- Fabiano */
break;
case CMPH_BMZ8: /* included -- Fabiano */
break;
case CMPH_BRZ: /* included -- Fabiano */
brz_config_set_mphf_fd(mph, mphf_fd);
break;
default:
assert(0);
}
}
void cmph_config_set_b(cmph_config_t *mph, cmph_uint8 b)
{
switch (mph->algo)
{
case CMPH_CHM:
break;
case CMPH_BMZ: /* included -- Fabiano */
break;
case CMPH_BMZ8: /* included -- Fabiano */
break;
case CMPH_BRZ: /* included -- Fabiano */
brz_config_set_b(mph, b);
break;
default:
assert(0);
}
}
void cmph_config_set_memory_availability(cmph_config_t *mph, cmph_uint32 memory_availability)

View File

@ -41,6 +41,8 @@ void cmph_config_set_verbosity(cmph_config_t *mph, cmph_uint32 verbosity);
void cmph_config_set_graphsize(cmph_config_t *mph, float c);
void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo);
void cmph_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir);
void cmph_config_set_mphf_fd(cmph_config_t *mph, FILE *mphf_fd);
void cmph_config_set_b(cmph_config_t *mph, cmph_uint8 b);
void cmph_config_set_memory_availability(cmph_config_t *mph, cmph_uint32 memory_availability);
void cmph_config_destroy(cmph_config_t *mph);

View File

@ -89,9 +89,6 @@ jenkins_state_t *jenkins_state_new(cmph_uint32 size) //size of hash table
jenkins_state_t *state = (jenkins_state_t *)malloc(sizeof(jenkins_state_t));
DEBUGP("Initializing jenkins hash\n");
state->seed = rand() % size;
state->nbits = (cmph_uint32)ceil(log(size)/M_LOG2E);
state->size = size;
DEBUGP("Initialized jenkins with size %u, nbits %u and seed %u\n", size, state->nbits, state->seed);
return state;
}
void jenkins_state_destroy(jenkins_state_t *state)
@ -164,7 +161,7 @@ cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keyl
void jenkins_state_dump(jenkins_state_t *state, char **buf, cmph_uint32 *buflen)
{
*buflen = sizeof(cmph_uint32)*3;
*buflen = sizeof(cmph_uint32);
*buf = malloc(*buflen);
if (!*buf)
{
@ -172,10 +169,7 @@ void jenkins_state_dump(jenkins_state_t *state, char **buf, cmph_uint32 *buflen)
return;
}
memcpy(*buf, &(state->seed), sizeof(cmph_uint32));
memcpy(*buf + sizeof(cmph_uint32), &(state->nbits), sizeof(cmph_uint32));
memcpy(*buf + sizeof(cmph_uint32)*2, &(state->size), sizeof(cmph_uint32));
DEBUGP("Dumped jenkins state with seed %u\n", state->seed);
return;
}
@ -184,8 +178,6 @@ jenkins_state_t *jenkins_state_copy(jenkins_state_t *src_state)
jenkins_state_t *dest_state = (jenkins_state_t *)malloc(sizeof(jenkins_state_t));
dest_state->hashfunc = src_state->hashfunc;
dest_state->seed = src_state->seed;
dest_state->nbits = src_state->nbits;
dest_state->size = src_state->size;
return dest_state;
}
@ -193,8 +185,6 @@ jenkins_state_t *jenkins_state_load(const char *buf, cmph_uint32 buflen)
{
jenkins_state_t *state = (jenkins_state_t *)malloc(sizeof(jenkins_state_t));
state->seed = *(cmph_uint32 *)buf;
state->nbits = *(((cmph_uint32 *)buf) + 1);
state->size = *(((cmph_uint32 *)buf) + 2);
state->hashfunc = CMPH_HASH_JENKINS;
DEBUGP("Loaded jenkins state with seed %u\n", state->seed);
return state;

View File

@ -7,8 +7,6 @@ typedef struct __jenkins_state_t
{
CMPH_HASH hashfunc;
cmph_uint32 seed;
cmph_uint32 nbits;
cmph_uint32 size;
} jenkins_state_t;
jenkins_state_t *jenkins_state_new(cmph_uint32 size); //size of hash table

View File

@ -22,12 +22,12 @@
void usage(const char *prg)
{
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c value][-s seed] ] [-a algorithm] [-M memory_in_MB] [-d tmp_dir] [-m file.mph] keysfile\n", prg);
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c value][-s seed] ] [-a algorithm] [-M memory_in_MB] [-b BRZ_parameter] [-d tmp_dir] [-m file.mph] keysfile\n", prg);
}
void usage_long(const char *prg)
{
cmph_uint32 i;
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c value][-s seed] ] [-a algorithm] [-M memory_in_MB] [-d tmp_dir] [-m file.mph] keysfile\n", prg);
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c value][-s seed] ] [-a algorithm] [-M memory_in_MB] [-b BRZ_parameter] [-d tmp_dir] [-m file.mph] keysfile\n", prg);
fprintf(stderr, "Minimum perfect hashing tool\n\n");
fprintf(stderr, " -h\t print this help message\n");
fprintf(stderr, " -c\t c value that determines the number of vertices in the graph\n");
@ -43,13 +43,13 @@ void usage_long(const char *prg)
fprintf(stderr, " -m\t minimum perfect hash function file \n");
fprintf(stderr, " -M\t main memory availability (in MB)\n");
fprintf(stderr, " -d\t temporary directory used in brz algorithm \n");
fprintf(stderr, " -b\t parmeter of BRZ algorithm to make the maximal number of keys in a bucket lower than 256\n");
fprintf(stderr, " keysfile\t line separated file with keys\n");
}
int main(int argc, char **argv)
{
char verbosity = 0;
char verbosity = 0;
char generate = 0;
char *mphf_file = NULL;
FILE *mphf_fd = stdout;
@ -67,9 +67,10 @@ int main(int argc, char **argv)
cmph_uint8 * tmp_dir = NULL;
cmph_io_adapter_t *source;
cmph_uint32 memory_availability = 0;
cmph_uint32 b = 128;
while (1)
{
char ch = getopt(argc, argv, "hVvgc:k:a:M:f:m:d:s:");
char ch = getopt(argc, argv, "hVvgc:k:a:M:b:f:m:d:s:");
if (ch == -1) break;
switch (ch)
{
@ -122,6 +123,16 @@ int main(int argc, char **argv)
}
}
break;
case 'b':
{
char *cptr;
b = strtoul(optarg, &cptr, 10);
if(*cptr != 0) {
fprintf(stderr, "Parameter b was not found: %s\n", optarg);
exit(1);
}
}
break;
case 'v':
++verbosity;
break;
@ -184,9 +195,9 @@ int main(int argc, char **argv)
return 1;
}
keys_file = argv[optind];
if (seed == UINT_MAX) seed = (cmph_uint32)time(NULL);
srand(seed);
int ret = 0;
if (mphf_file == NULL)
{
@ -196,6 +207,7 @@ int main(int argc, char **argv)
}
keys_fd = fopen(keys_file, "r");
if (keys_fd == NULL)
{
fprintf(stderr, "Unable to open file %s: %s\n", keys_file, strerror(errno));
@ -209,33 +221,35 @@ int main(int argc, char **argv)
if (generate)
{
//Create mphf
mphf_fd = fopen(mphf_file, "w");
config = cmph_config_new(source);
cmph_config_set_algo(config, mph_algo);
if (nhashes) cmph_config_set_hashfuncs(config, hashes);
cmph_config_set_verbosity(config, verbosity);
cmph_config_set_tmp_dir(config, tmp_dir);
cmph_config_set_mphf_fd(config, mphf_fd);
cmph_config_set_memory_availability(config, memory_availability);
cmph_config_set_b(config, b);
if(mph_algo == CMPH_BMZ && c >= 2.0) c=1.15;
if (c != 0) cmph_config_set_graphsize(config, c);
mphf = cmph_new(config);
cmph_config_destroy(config);
if (mphf == NULL)
{
fprintf(stderr, "Unable to create minimum perfect hashing function\n");
cmph_config_destroy(config);
//cmph_config_destroy(config);
free(mphf_file);
return -1;
}
mphf_fd = fopen(mphf_file, "w");
if (mphf_fd == NULL)
{
fprintf(stderr, "Unable to open output file %s: %s\n", mphf_file, strerror(errno));
free(mphf_file);
return -1;
}
cmph_dump(mphf, mphf_fd);
cmph_destroy(mphf);
cmph_dump(mphf, mphf_fd);
cmph_destroy(mphf);
fclose(mphf_fd);
}
else
@ -289,6 +303,7 @@ int main(int argc, char **argv)
fclose(keys_fd);
free(mphf_file);
free(tmp_dir);
free(source);
cmph_io_nlfile_adapter_destroy(source);
return ret;
}

View File

@ -46,5 +46,5 @@ void vqueue_print(vqueue_t * q)
void vqueue_destroy(vqueue_t *q)
{
free(q->values); q->values = NULL;
free(q->values); q->values = NULL; free(q);
}

View File

@ -8,96 +8,79 @@ Single
-2
1200 2
0 32 #bebebe
6 3285 3600 3555 4230
6 3285 3780 3555 4230
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
3285 4140 3555 4140 3555 4230 3285 4230 3285 4140
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
3285 4050 3555 4050 3555 4140 3285 4140 3285 4050
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
3285 3960 3555 3960 3555 4050 3285 4050 3285 3960
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
3285 3870 3555 3870 3555 3960 3285 3960 3285 3870
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
3285 3780 3555 3780 3555 3870 3285 3870 3285 3780
-6
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
3285 3690 3555 3690 3555 3780 3285 3780 3285 3690
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
3285 3600 3555 3600 3555 3690 3285 3690 3285 3600
-6
6 1800 4500 3330 5175
6 2025 3015 3555 3690
2 3 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 8
1800 4770 2070 4770 2070 4500 3060 4500 3060 4770 3330 4770
2565 5175 1800 4770
4 0 0 50 -1 0 10 0.0000 4 150 600 2265 4867 Spreading\001
2025 3285 2295 3285 2295 3015 3285 3015 3285 3285 3555 3285
2790 3690 2025 3285
4 0 0 50 -1 0 10 0.0000 4 135 765 2385 3330 Partitioning\001
-6
6 2250 3060 2880 3600
6 2250 3060 2880 3600
6 2250 3060 2880 3600
6 2250 3060 2880 3600
2 3 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 8
2250 3330 2430 3330 2430 3060 2700 3060 2700 3330 2880 3330
2565 3600 2250 3330
6 1890 3735 3780 4365
6 2430 3735 2700 4365
6 2430 3915 2700 4365
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2430 4275 2700 4275 2700 4365 2430 4365 2430 4275
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2430 4185 2700 4185 2700 4275 2430 4275 2430 4185
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2430 4095 2700 4095 2700 4185 2430 4185 2430 4095
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2430 4005 2700 4005 2700 4095 2430 4095 2430 4005
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2430 3915 2700 3915 2700 4005 2430 4005 2430 3915
-6
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2430 3825 2700 3825 2700 3915 2430 3915 2430 3825
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2430 3735 2700 3735 2700 3825 2430 3825 2430 3735
-6
4 0 0 50 -1 0 10 0.0000 4 105 75 2521 3382 h\001
-6
4 0 0 50 -1 0 6 0.0000 4 60 45 2589 3419 1\001
-6
6 1395 2655 3825 2970
2 4 0 1 0 7 50 -1 -1 0.000 0 0 7 0 0 5
3825 2970 3825 2655 1395 2655 1395 2970 3825 2970
4 0 0 50 -1 0 10 0.0000 4 135 795 2212 2850 Set of Keys S\001
-6
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
1890 4275 2160 4275 2160 4365 1890 4365 1890 4275
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
1890 4185 2160 4185 2160 4275 1890 4275 1890 4185
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2160 4275 2430 4275 2430 4365 2160 4365 2160 4275
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2160 4185 2430 4185 2430 4275 2160 4275 2160 4185
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2160 4095 2430 4095 2430 4185 2160 4185 2160 4095
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2160 4005 2430 4005 2430 4095 2160 4095 2160 4005
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2160 3915 2430 3915 2430 4005 2160 4005 2160 3915
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2700 4275 2970 4275 2970 4365 2700 4365 2700 4275
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2700 4185 2970 4185 2970 4275 2700 4275 2700 4185
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2700 4095 2970 4095 2970 4185 2700 4185 2700 4095
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2700 4005 2970 4005 2970 4095 2700 4095 2700 4005
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2160 3825 2430 3825 2430 3915 2160 3915 2160 3825
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
3240 4275 3510 4275 3510 4365 3240 4365 3240 4275
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
3510 4275 3780 4275 3780 4365 3510 4365 3510 4275
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2970 4275 3240 4275 3240 4365 2970 4365 2970 4275
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
3240 4185 3510 4185 3510 4275 3240 4275 3240 4185
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
1890 4095 2160 4095 2160 4185 1890 4185 1890 4095
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
3510 4185 3780 4185 3780 4275 3510 4275 3510 4185
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
3240 4095 3510 4095 3510 4185 3240 4185 3240 4095
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
3240 4005 3510 4005 3510 4095 3240 4095 3240 4005
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
3240 3915 3510 3915 3510 4005 3240 4005 3240 3915
2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
1395 4230 3825 4230
1890 4365 3780 4365
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
1395 4140 1665 4140 1665 4230 1395 4230 1395 4140
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
1395 4050 1665 4050 1665 4140 1395 4140 1395 4050
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
1665 4140 1935 4140 1935 4230 1665 4230 1665 4140
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
1665 4050 1935 4050 1935 4140 1665 4140 1665 4050
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
1665 3960 1935 3960 1935 4050 1665 4050 1665 3960
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
1665 3870 1935 3870 1935 3960 1665 3960 1665 3870
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
1665 3780 1935 3780 1935 3870 1665 3870 1665 3780
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2205 4140 2475 4140 2475 4230 2205 4230 2205 4140
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2205 4050 2475 4050 2475 4140 2205 4140 2205 4050
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2205 3960 2475 3960 2475 4050 2205 4050 2205 3960
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2205 3870 2475 3870 2475 3960 2205 3960 2205 3870
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
1665 3690 1935 3690 1935 3780 1665 3780 1665 3690
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2745 4140 3015 4140 3015 4230 2745 4230 2745 4140
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
3015 4140 3285 4140 3285 4230 3015 4230 3015 4140
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2475 4140 2745 4140 2745 4230 2475 4230 2475 4140
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2745 4050 3015 4050 3015 4140 2745 4140 2745 4050
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
1395 3960 1665 3960 1665 4050 1395 4050 1395 3960
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
3555 4140 3825 4140 3825 4230 3555 4230 3555 4140
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
3555 4050 3825 4050 3825 4140 3555 4140 3555 4050
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
3015 4050 3285 4050 3285 4140 3015 4140 3015 4050
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2745 3960 3015 3960 3015 4050 2745 4050 2745 3960
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2745 3870 3015 3870 3015 3960 2745 3960 2745 3870
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2745 3780 3015 3780 3015 3870 2745 3870 2745 3780
2970 4185 3240 4185 3240 4275 2970 4275 2970 4185
-6
6 1260 5310 4230 5580
2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
1260 5400 4230 5400
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
@ -122,14 +105,49 @@ Single
3150 5310 3420 5310 3420 5400 3150 5400 3150 5310
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
1260 5310 1530 5310 1530 5400 1260 5400 1260 5310
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
3285 3510 3555 3510 3555 3600 3285 3600 3285 3510
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
3285 3420 3555 3420 3555 3510 3285 3510 3285 3420
4 0 0 50 -1 0 10 0.0000 4 105 75 1485 4410 0\001
4 0 0 50 -1 0 10 0.0000 4 105 210 3600 4410 b-1\001
4 0 0 50 -1 0 10 0.0000 4 105 480 720 4050 Buckets\001
4 0 0 50 -1 0 10 0.0000 4 105 90 900 4230 B\001
4 0 0 50 -1 0 10 0.0000 4 105 210 4005 5580 n-1\001
4 0 0 50 -1 0 10 0.0000 4 105 75 1350 5580 0\001
4 0 0 50 -1 0 10 0.0000 4 105 690 450 5400 Hash Table\001
-6
2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
1260 2925 4230 2925
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
1530 2835 1800 2835 1800 2925 1530 2925 1530 2835
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2070 2835 2340 2835 2340 2925 2070 2925 2070 2835
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2340 2835 2610 2835 2610 2925 2340 2925 2340 2835
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2610 2835 2880 2835 2880 2925 2610 2925 2610 2835
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
2880 2835 3150 2835 3150 2925 2880 2925 2880 2835
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
3420 2835 3690 2835 3690 2925 3420 2925 3420 2835
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
3690 2835 3960 2835 3960 2925 3690 2925 3690 2835
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
3960 2835 4230 2835 4230 2925 3960 2925 3960 2835
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
1800 2835 2070 2835 2070 2925 1800 2925 1800 2835
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
3150 2835 3420 2835 3420 2925 3150 2925 3150 2835
2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
1260 2835 1530 2835 1530 2925 1260 2925 1260 2835
2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
3510 4410 3510 4590
2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
3510 4410 3600 4410
2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
3690 4410 3780 4410
2 3 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 8
2025 4815 2295 4815 2295 4545 3285 4545 3285 4815 3555 4815
2790 5220 2025 4815
2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
3780 4410 3780 4590
4 0 0 50 -1 0 10 0.0000 4 135 585 2475 4860 Searching\001
4 0 0 50 -1 0 10 0.0000 4 105 75 1980 4545 0\001
4 0 0 50 -1 0 10 0.0000 4 105 690 4410 5400 Hash Table\001
4 0 0 50 -1 0 10 0.0000 4 105 480 4410 4230 Buckets\001
4 0 0 50 -1 0 10 0.0000 4 135 555 4410 2925 Key set S\001
4 0 0 50 -1 0 10 0.0000 4 105 75 1350 2745 0\001
4 0 0 50 -1 0 10 0.0000 4 105 210 4005 2745 n-1\001
4 0 0 50 -1 0 10 0.0000 4 105 420 3555 4545 n/b - 1\001