Aesthetics in C code and replaced some asserts with NULL returns.

This commit is contained in:
Davi de Castro Reis 2011-12-26 19:35:30 -02:00
parent 4e4d36d833
commit 24e645febe
27 changed files with 649 additions and 656 deletions

View File

@ -35,9 +35,9 @@ const cmph_uint8 bdz_lookup_table[] =
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1, 0 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1, 0
}; };
typedef struct typedef struct
{ {
cmph_uint32 vertices[3]; cmph_uint32 vertices[3];
cmph_uint32 next_edges[3]; cmph_uint32 next_edges[3];
@ -54,12 +54,12 @@ static void bdz_free_queue(bdz_queue_t * queue)
free(*queue); free(*queue);
}; };
typedef struct typedef struct
{ {
cmph_uint32 nedges; cmph_uint32 nedges;
bdz_edge_t * edges; bdz_edge_t * edges;
cmph_uint32 * first_edge; cmph_uint32 * first_edge;
cmph_uint8 * vert_degree; cmph_uint8 * vert_degree;
}bdz_graph3_t; }bdz_graph3_t;
@ -67,7 +67,7 @@ static void bdz_alloc_graph3(bdz_graph3_t * graph3, cmph_uint32 nedges, cmph_uin
{ {
graph3->edges=malloc(nedges*sizeof(bdz_edge_t)); graph3->edges=malloc(nedges*sizeof(bdz_edge_t));
graph3->first_edge=malloc(nvertices*sizeof(cmph_uint32)); graph3->first_edge=malloc(nvertices*sizeof(cmph_uint32));
graph3->vert_degree=malloc((size_t)nvertices); graph3->vert_degree=malloc((size_t)nvertices);
}; };
static void bdz_init_graph3(bdz_graph3_t * graph3, cmph_uint32 nedges, cmph_uint32 nvertices) static void bdz_init_graph3(bdz_graph3_t * graph3, cmph_uint32 nedges, cmph_uint32 nvertices)
{ {
@ -136,7 +136,7 @@ static void bdz_remove_edge(bdz_graph3_t * graph3, cmph_uint32 curr_edge)
j=0; j=0;
} else if(graph3->edges[edge1].vertices[1]==vert){ } else if(graph3->edges[edge1].vertices[1]==vert){
j=1; j=1;
} else } else
j=2; j=2;
edge1=graph3->edges[edge1].next_edges[j]; edge1=graph3->edges[edge1].next_edges[j];
}; };
@ -145,16 +145,16 @@ static void bdz_remove_edge(bdz_graph3_t * graph3, cmph_uint32 curr_edge)
bdz_dump_graph(graph3,graph3->nedges,graph3->nedges+graph3->nedges/4); bdz_dump_graph(graph3,graph3->nedges,graph3->nedges+graph3->nedges/4);
exit(-1); exit(-1);
}; };
if(edge2!=NULL_EDGE){ if(edge2!=NULL_EDGE){
graph3->edges[edge2].next_edges[j] = graph3->edges[edge2].next_edges[j] =
graph3->edges[edge1].next_edges[i]; graph3->edges[edge1].next_edges[i];
} else } else
graph3->first_edge[vert]= graph3->first_edge[vert]=
graph3->edges[edge1].next_edges[i]; graph3->edges[edge1].next_edges[i];
graph3->vert_degree[vert]--; graph3->vert_degree[vert]--;
}; };
}; };
static int bdz_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_queue_t queue, bdz_graph3_t* graph3) static int bdz_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_queue_t queue, bdz_graph3_t* graph3)
@ -170,7 +170,7 @@ static int bdz_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_que
v0=graph3->edges[i].vertices[0]; v0=graph3->edges[i].vertices[0];
v1=graph3->edges[i].vertices[1]; v1=graph3->edges[i].vertices[1];
v2=graph3->edges[i].vertices[2]; v2=graph3->edges[i].vertices[2];
if(graph3->vert_degree[v0]==1 || if(graph3->vert_degree[v0]==1 ||
graph3->vert_degree[v1]==1 || graph3->vert_degree[v1]==1 ||
graph3->vert_degree[v2]==1){ graph3->vert_degree[v2]==1){
if(!GETBIT(marked_edge,i)) { if(!GETBIT(marked_edge,i)) {
@ -196,7 +196,7 @@ static int bdz_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_que
queue[queue_head++]=tmp_edge; queue[queue_head++]=tmp_edge;
SETBIT(marked_edge,tmp_edge); SETBIT(marked_edge,tmp_edge);
}; };
}; };
if(graph3->vert_degree[v1]==1) { if(graph3->vert_degree[v1]==1) {
tmp_edge=graph3->first_edge[v1]; tmp_edge=graph3->first_edge[v1];
@ -204,7 +204,7 @@ static int bdz_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_que
queue[queue_head++]=tmp_edge; queue[queue_head++]=tmp_edge;
SETBIT(marked_edge,tmp_edge); SETBIT(marked_edge,tmp_edge);
}; };
}; };
if(graph3->vert_degree[v2]==1){ if(graph3->vert_degree[v2]==1){
tmp_edge=graph3->first_edge[v2]; tmp_edge=graph3->first_edge[v2];
@ -227,7 +227,7 @@ bdz_config_data_t *bdz_config_new(void)
{ {
bdz_config_data_t *bdz; bdz_config_data_t *bdz;
bdz = (bdz_config_data_t *)malloc(sizeof(bdz_config_data_t)); bdz = (bdz_config_data_t *)malloc(sizeof(bdz_config_data_t));
assert(bdz); if (!bdz) return NULL;
memset(bdz, 0, sizeof(bdz_config_data_t)); memset(bdz, 0, sizeof(bdz_config_data_t));
bdz->hashfunc = CMPH_HASH_JENKINS; bdz->hashfunc = CMPH_HASH_JENKINS;
bdz->g = NULL; bdz->g = NULL;
@ -328,10 +328,10 @@ cmph_t *bdz_new(cmph_config_t *mph, double c)
fprintf(stderr, "acyclic graph creation failure - %u iterations remaining\n", iterations); fprintf(stderr, "acyclic graph creation failure - %u iterations remaining\n", iterations);
} }
if (iterations == 0) break; if (iterations == 0) break;
} }
else break; else break;
} }
if (iterations == 0) if (iterations == 0)
{ {
bdz_free_queue(&edges); bdz_free_queue(&edges);
@ -353,7 +353,7 @@ cmph_t *bdz_new(cmph_config_t *mph, double c)
fprintf(stderr, "Entering ranking step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n); fprintf(stderr, "Entering ranking step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n);
} }
ranking(bdz); ranking(bdz);
#ifdef CMPH_TIMING #ifdef CMPH_TIMING
ELAPSED_TIME_IN_SECONDS(&construction_time); ELAPSED_TIME_IN_SECONDS(&construction_time);
#endif #endif
mphf = (cmph_t *)malloc(sizeof(cmph_t)); mphf = (cmph_t *)malloc(sizeof(cmph_t));
@ -381,17 +381,17 @@ cmph_t *bdz_new(cmph_config_t *mph, double c)
} }
#ifdef CMPH_TIMING #ifdef CMPH_TIMING
register cmph_uint32 space_usage = bdz_packed_size(mphf)*8; register cmph_uint32 space_usage = bdz_packed_size(mphf)*8;
register cmph_uint32 keys_per_bucket = 1; register cmph_uint32 keys_per_bucket = 1;
construction_time = construction_time - construction_time_begin; construction_time = construction_time - construction_time_begin;
fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", bdz->m, bdz->m/(double)bdz->n, keys_per_bucket, construction_time, space_usage/(double)bdz->m); fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", bdz->m, bdz->m/(double)bdz->n, keys_per_bucket, construction_time, space_usage/(double)bdz->m);
#endif #endif
return mphf; return mphf;
} }
static int bdz_mapping(cmph_config_t *mph, bdz_graph3_t* graph3, bdz_queue_t queue) static int bdz_mapping(cmph_config_t *mph, bdz_graph3_t* graph3, bdz_queue_t queue)
{ {
cmph_uint32 e; cmph_uint32 e;
@ -405,7 +405,7 @@ static int bdz_mapping(cmph_config_t *mph, bdz_graph3_t* graph3, bdz_queue_t que
cmph_uint32 h0, h1, h2; cmph_uint32 h0, h1, h2;
cmph_uint32 keylen; cmph_uint32 keylen;
char *key = NULL; char *key = NULL;
mph->key_source->read(mph->key_source->data, &key, &keylen); mph->key_source->read(mph->key_source->data, &key, &keylen);
hash_vector(bdz->hl, key, keylen,hl); hash_vector(bdz->hl, key, keylen,hl);
h0 = hl[0] % bdz->r; h0 = hl[0] % bdz->r;
h1 = hl[1] % bdz->r + bdz->r; h1 = hl[1] % bdz->r + bdz->r;
@ -414,7 +414,7 @@ static int bdz_mapping(cmph_config_t *mph, bdz_graph3_t* graph3, bdz_queue_t que
mph->key_source->dispose(mph->key_source->data, key, keylen); mph->key_source->dispose(mph->key_source->data, key, keylen);
bdz_add_edge(graph3,h0,h1,h2); bdz_add_edge(graph3,h0,h1,h2);
} }
cycles = bdz_generate_queue(bdz->m, bdz->n, queue, graph3); cycles = bdz_generate_queue(bdz->m, bdz->n, queue, graph3);
return (cycles == 0); return (cycles == 0);
} }
@ -426,7 +426,7 @@ static void assigning(bdz_config_data_t *bdz, bdz_graph3_t* graph3, bdz_queue_t
cmph_uint32 v0,v1,v2; cmph_uint32 v0,v1,v2;
cmph_uint8 * marked_vertices =malloc((size_t)(bdz->n >> 3) + 1); cmph_uint8 * marked_vertices =malloc((size_t)(bdz->n >> 3) + 1);
cmph_uint32 sizeg = (cmph_uint32)ceil(bdz->n/4.0); cmph_uint32 sizeg = (cmph_uint32)ceil(bdz->n/4.0);
bdz->g = (cmph_uint8 *)calloc((size_t)(sizeg), sizeof(cmph_uint8)); bdz->g = (cmph_uint8 *)calloc((size_t)(sizeg), sizeof(cmph_uint8));
memset(marked_vertices, 0, (size_t)(bdz->n >> 3) + 1); memset(marked_vertices, 0, (size_t)(bdz->n >> 3) + 1);
memset(bdz->g, 0xff, (size_t)(sizeg)); memset(bdz->g, 0xff, (size_t)(sizeg));
@ -439,12 +439,12 @@ static void assigning(bdz_config_data_t *bdz, bdz_graph3_t* graph3, bdz_queue_t
if(!GETBIT(marked_vertices, v0)){ if(!GETBIT(marked_vertices, v0)){
if(!GETBIT(marked_vertices,v1)) if(!GETBIT(marked_vertices,v1))
{ {
SETVALUE1(bdz->g, v1, UNASSIGNED); SETVALUE1(bdz->g, v1, UNASSIGNED);
SETBIT(marked_vertices, v1); SETBIT(marked_vertices, v1);
} }
if(!GETBIT(marked_vertices,v2)) if(!GETBIT(marked_vertices,v2))
{ {
SETVALUE1(bdz->g, v2, UNASSIGNED); SETVALUE1(bdz->g, v2, UNASSIGNED);
SETBIT(marked_vertices, v2); SETBIT(marked_vertices, v2);
} }
SETVALUE1(bdz->g, v0, (6-(GETVALUE(bdz->g, v1) + GETVALUE(bdz->g,v2)))%3); SETVALUE1(bdz->g, v0, (6-(GETVALUE(bdz->g, v1) + GETVALUE(bdz->g,v2)))%3);
@ -507,7 +507,7 @@ int bdz_dump(cmph_t *mphf, FILE *fd)
nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd); nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd);
nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd); nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
nbytes = fwrite(&(data->r), sizeof(cmph_uint32), (size_t)1, fd); nbytes = fwrite(&(data->r), sizeof(cmph_uint32), (size_t)1, fd);
cmph_uint32 sizeg = (cmph_uint32)ceil(data->n/4.0); cmph_uint32 sizeg = (cmph_uint32)ceil(data->n/4.0);
nbytes = fwrite(data->g, sizeof(cmph_uint8)*sizeg, (size_t)1, fd); nbytes = fwrite(data->g, sizeof(cmph_uint8)*sizeg, (size_t)1, fd);
@ -541,12 +541,12 @@ void bdz_load(FILE *f, cmph_t *mphf)
nbytes = fread(buf, (size_t)buflen, (size_t)1, f); nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
bdz->hl = hash_state_load(buf, buflen); bdz->hl = hash_state_load(buf, buflen);
free(buf); free(buf);
DEBUGP("Reading m and n\n"); DEBUGP("Reading m and n\n");
nbytes = fread(&(bdz->n), sizeof(cmph_uint32), (size_t)1, f); nbytes = fread(&(bdz->n), sizeof(cmph_uint32), (size_t)1, f);
nbytes = fread(&(bdz->m), sizeof(cmph_uint32), (size_t)1, f); nbytes = fread(&(bdz->m), sizeof(cmph_uint32), (size_t)1, f);
nbytes = fread(&(bdz->r), sizeof(cmph_uint32), (size_t)1, f); nbytes = fread(&(bdz->r), sizeof(cmph_uint32), (size_t)1, f);
sizeg = (cmph_uint32)ceil(bdz->n/4.0); sizeg = (cmph_uint32)ceil(bdz->n/4.0);
bdz->g = (cmph_uint8 *)calloc((size_t)(sizeg), sizeof(cmph_uint8)); bdz->g = (cmph_uint8 *)calloc((size_t)(sizeg), sizeof(cmph_uint8));
nbytes = fread(bdz->g, sizeg*sizeof(cmph_uint8), (size_t)1, f); nbytes = fread(bdz->g, sizeg*sizeof(cmph_uint8), (size_t)1, f);
@ -566,7 +566,7 @@ void bdz_load(FILE *f, cmph_t *mphf)
#endif #endif
return; return;
} }
static inline cmph_uint32 rank(cmph_uint32 b, cmph_uint32 * ranktable, cmph_uint8 * g, cmph_uint32 vertex) static inline cmph_uint32 rank(cmph_uint32 b, cmph_uint32 * ranktable, cmph_uint8 * g, cmph_uint32 vertex)
{ {
@ -578,17 +578,17 @@ static inline cmph_uint32 rank(cmph_uint32 b, cmph_uint32 * ranktable, cmph_uint
while(beg_idx_b < end_idx_b) while(beg_idx_b < end_idx_b)
{ {
base_rank += bdz_lookup_table[*(g + beg_idx_b++)]; base_rank += bdz_lookup_table[*(g + beg_idx_b++)];
} }
DEBUGP("base rank %u\n", base_rank); DEBUGP("base rank %u\n", base_rank);
beg_idx_v = beg_idx_b << 2; beg_idx_v = beg_idx_b << 2;
DEBUGP("beg_idx_v %u\n", beg_idx_v); DEBUGP("beg_idx_v %u\n", beg_idx_v);
while(beg_idx_v < vertex) while(beg_idx_v < vertex)
{ {
if(GETVALUE(g, beg_idx_v) != UNASSIGNED) base_rank++; if(GETVALUE(g, beg_idx_v) != UNASSIGNED) base_rank++;
beg_idx_v++; beg_idx_v++;
} }
return base_rank; return base_rank;
} }
@ -610,7 +610,7 @@ cmph_uint32 bdz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
void bdz_destroy(cmph_t *mphf) void bdz_destroy(cmph_t *mphf)
{ {
bdz_data_t *data = (bdz_data_t *)mphf->data; bdz_data_t *data = (bdz_data_t *)mphf->data;
free(data->g); free(data->g);
hash_state_destroy(data->hl); hash_state_destroy(data->hl);
free(data->ranktable); free(data->ranktable);
free(data); free(data);
@ -660,18 +660,18 @@ void bdz_pack(cmph_t *mphf, void *packed_mphf)
* \brief Return the amount of space needed to pack mphf. * \brief Return the amount of space needed to pack mphf.
* \param mphf pointer to a mphf * \param mphf pointer to a mphf
* \return the size of the packed function or zero for failures * \return the size of the packed function or zero for failures
*/ */
cmph_uint32 bdz_packed_size(cmph_t *mphf) cmph_uint32 bdz_packed_size(cmph_t *mphf)
{ {
bdz_data_t *data = (bdz_data_t *)mphf->data; bdz_data_t *data = (bdz_data_t *)mphf->data;
CMPH_HASH hl_type = hash_get_type(data->hl); CMPH_HASH hl_type = hash_get_type(data->hl);
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(hl_type) + 3*sizeof(cmph_uint32) + sizeof(cmph_uint32)*(data->ranktablesize) + sizeof(cmph_uint8) + sizeof(cmph_uint8)* (cmph_uint32)(ceil(data->n/4.0))); return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(hl_type) + 3*sizeof(cmph_uint32) + sizeof(cmph_uint32)*(data->ranktablesize) + sizeof(cmph_uint8) + sizeof(cmph_uint8)* (cmph_uint32)(ceil(data->n/4.0)));
} }
/** cmph_uint32 bdz_search(void *packed_mphf, const char *key, cmph_uint32 keylen); /** cmph_uint32 bdz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
* \brief Use the packed mphf to do a search. * \brief Use the packed mphf to do a search.
* \param packed_mphf pointer to the packed mphf * \param packed_mphf pointer to the packed mphf
* \param key key to be hashed * \param key key to be hashed
* \param keylen key legth in bytes * \param keylen key legth in bytes
@ -679,13 +679,13 @@ cmph_uint32 bdz_packed_size(cmph_t *mphf)
*/ */
cmph_uint32 bdz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen) cmph_uint32 bdz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
{ {
register cmph_uint32 vertex; register cmph_uint32 vertex;
register CMPH_HASH hl_type = *(cmph_uint32 *)packed_mphf; register CMPH_HASH hl_type = *(cmph_uint32 *)packed_mphf;
register cmph_uint8 *hl_ptr = (cmph_uint8 *)(packed_mphf) + 4; register cmph_uint8 *hl_ptr = (cmph_uint8 *)(packed_mphf) + 4;
register cmph_uint32 *ranktable = (cmph_uint32*)(hl_ptr + hash_state_packed_size(hl_type)); register cmph_uint32 *ranktable = (cmph_uint32*)(hl_ptr + hash_state_packed_size(hl_type));
register cmph_uint32 r = *ranktable++; register cmph_uint32 r = *ranktable++;
register cmph_uint32 ranktablesize = *ranktable++; register cmph_uint32 ranktablesize = *ranktable++;
register cmph_uint8 * g = (cmph_uint8 *)(ranktable + ranktablesize); register cmph_uint8 * g = (cmph_uint8 *)(ranktable + ranktablesize);

View File

@ -24,7 +24,7 @@ static cmph_uint8 lookup_table[5][256] = {
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
}; };
typedef struct typedef struct
{ {
cmph_uint32 vertices[3]; cmph_uint32 vertices[3];
cmph_uint32 next_edges[3]; cmph_uint32 next_edges[3];
@ -41,12 +41,12 @@ static void bdz_ph_free_queue(bdz_ph_queue_t * queue)
free(*queue); free(*queue);
}; };
typedef struct typedef struct
{ {
cmph_uint32 nedges; cmph_uint32 nedges;
bdz_ph_edge_t * edges; bdz_ph_edge_t * edges;
cmph_uint32 * first_edge; cmph_uint32 * first_edge;
cmph_uint8 * vert_degree; cmph_uint8 * vert_degree;
}bdz_ph_graph3_t; }bdz_ph_graph3_t;
@ -54,7 +54,7 @@ static void bdz_ph_alloc_graph3(bdz_ph_graph3_t * graph3, cmph_uint32 nedges, cm
{ {
graph3->edges=malloc(nedges*sizeof(bdz_ph_edge_t)); graph3->edges=malloc(nedges*sizeof(bdz_ph_edge_t));
graph3->first_edge=malloc(nvertices*sizeof(cmph_uint32)); graph3->first_edge=malloc(nvertices*sizeof(cmph_uint32));
graph3->vert_degree=malloc((size_t)nvertices); graph3->vert_degree=malloc((size_t)nvertices);
}; };
static void bdz_ph_init_graph3(bdz_ph_graph3_t * graph3, cmph_uint32 nedges, cmph_uint32 nvertices) static void bdz_ph_init_graph3(bdz_ph_graph3_t * graph3, cmph_uint32 nedges, cmph_uint32 nvertices)
{ {
@ -101,10 +101,10 @@ static void bdz_ph_dump_graph(bdz_ph_graph3_t* graph3, cmph_uint32 nedges, cmph_
printf(" nexts %d %d %d",graph3->edges[i].next_edges[0], printf(" nexts %d %d %d",graph3->edges[i].next_edges[0],
graph3->edges[i].next_edges[1],graph3->edges[i].next_edges[2]); graph3->edges[i].next_edges[1],graph3->edges[i].next_edges[2]);
}; };
for(i=0;i<nvertices;i++){ for(i=0;i<nvertices;i++){
printf("\nfirst for vertice %d %d ",i,graph3->first_edge[i]); printf("\nfirst for vertice %d %d ",i,graph3->first_edge[i]);
}; };
}; };
@ -121,7 +121,7 @@ static void bdz_ph_remove_edge(bdz_ph_graph3_t * graph3, cmph_uint32 curr_edge)
j=0; j=0;
} else if(graph3->edges[edge1].vertices[1]==vert){ } else if(graph3->edges[edge1].vertices[1]==vert){
j=1; j=1;
} else } else
j=2; j=2;
edge1=graph3->edges[edge1].next_edges[j]; edge1=graph3->edges[edge1].next_edges[j];
}; };
@ -130,16 +130,16 @@ static void bdz_ph_remove_edge(bdz_ph_graph3_t * graph3, cmph_uint32 curr_edge)
bdz_ph_dump_graph(graph3,graph3->nedges,graph3->nedges+graph3->nedges/4); bdz_ph_dump_graph(graph3,graph3->nedges,graph3->nedges+graph3->nedges/4);
exit(-1); exit(-1);
}; };
if(edge2!=NULL_EDGE){ if(edge2!=NULL_EDGE){
graph3->edges[edge2].next_edges[j] = graph3->edges[edge2].next_edges[j] =
graph3->edges[edge1].next_edges[i]; graph3->edges[edge1].next_edges[i];
} else } else
graph3->first_edge[vert]= graph3->first_edge[vert]=
graph3->edges[edge1].next_edges[i]; graph3->edges[edge1].next_edges[i];
graph3->vert_degree[vert]--; graph3->vert_degree[vert]--;
}; };
}; };
static int bdz_ph_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_ph_queue_t queue, bdz_ph_graph3_t* graph3) static int bdz_ph_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_ph_queue_t queue, bdz_ph_graph3_t* graph3)
@ -176,7 +176,7 @@ static int bdz_ph_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_
queue[queue_head++]=tmp_edge; queue[queue_head++]=tmp_edge;
SETBIT(marked_edge,tmp_edge); SETBIT(marked_edge,tmp_edge);
}; };
}; };
if(graph3->vert_degree[v1]==1) { if(graph3->vert_degree[v1]==1) {
tmp_edge=graph3->first_edge[v1]; tmp_edge=graph3->first_edge[v1];
@ -184,7 +184,7 @@ static int bdz_ph_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_
queue[queue_head++]=tmp_edge; queue[queue_head++]=tmp_edge;
SETBIT(marked_edge,tmp_edge); SETBIT(marked_edge,tmp_edge);
}; };
}; };
if(graph3->vert_degree[v2]==1){ if(graph3->vert_degree[v2]==1){
tmp_edge=graph3->first_edge[v2]; tmp_edge=graph3->first_edge[v2];
@ -229,7 +229,7 @@ void bdz_ph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
while(*hashptr != CMPH_HASH_COUNT) while(*hashptr != CMPH_HASH_COUNT)
{ {
if (i >= 1) break; //bdz_ph only uses one linear hash function if (i >= 1) break; //bdz_ph only uses one linear hash function
bdz_ph->hashfunc = *hashptr; bdz_ph->hashfunc = *hashptr;
++i, ++hashptr; ++i, ++hashptr;
} }
} }
@ -251,16 +251,16 @@ cmph_t *bdz_ph_new(cmph_config_t *mph, double c)
if (c == 0) c = 1.23; // validating restrictions over parameter c. if (c == 0) c = 1.23; // validating restrictions over parameter c.
DEBUGP("c: %f\n", c); DEBUGP("c: %f\n", c);
bdz_ph->m = mph->key_source->nkeys; bdz_ph->m = mph->key_source->nkeys;
bdz_ph->r = (cmph_uint32)ceil((c * mph->key_source->nkeys)/3); bdz_ph->r = (cmph_uint32)ceil((c * mph->key_source->nkeys)/3);
if ((bdz_ph->r % 2) == 0) bdz_ph->r += 1; if ((bdz_ph->r % 2) == 0) bdz_ph->r += 1;
bdz_ph->n = 3*bdz_ph->r; bdz_ph->n = 3*bdz_ph->r;
bdz_ph_alloc_graph3(&graph3, bdz_ph->m, bdz_ph->n); bdz_ph_alloc_graph3(&graph3, bdz_ph->m, bdz_ph->n);
bdz_ph_alloc_queue(&edges,bdz_ph->m); bdz_ph_alloc_queue(&edges,bdz_ph->m);
DEBUGP("Created hypergraph\n"); DEBUGP("Created hypergraph\n");
DEBUGP("m (edges): %u n (vertices): %u r: %u c: %f \n", bdz_ph->m, bdz_ph->n, bdz_ph->r, c); DEBUGP("m (edges): %u n (vertices): %u r: %u c: %f \n", bdz_ph->m, bdz_ph->n, bdz_ph->r, c);
// Mapping step // Mapping step
@ -287,10 +287,10 @@ cmph_t *bdz_ph_new(cmph_config_t *mph, double c)
fprintf(stderr, "acyclic graph creation failure - %u iterations remaining\n", iterations); fprintf(stderr, "acyclic graph creation failure - %u iterations remaining\n", iterations);
} }
if (iterations == 0) break; if (iterations == 0) break;
} }
else break; else break;
} }
if (iterations == 0) if (iterations == 0)
{ {
// free(bdz_ph->g); // free(bdz_ph->g);
@ -308,7 +308,7 @@ cmph_t *bdz_ph_new(cmph_config_t *mph, double c)
bdz_ph_free_queue(&edges); bdz_ph_free_queue(&edges);
bdz_ph_free_graph3(&graph3); bdz_ph_free_graph3(&graph3);
if (mph->verbosity) if (mph->verbosity)
{ {
fprintf(stderr, "Starting optimization step\n"); fprintf(stderr, "Starting optimization step\n");
@ -338,23 +338,23 @@ cmph_t *bdz_ph_new(cmph_config_t *mph, double c)
fprintf(stderr, "Successfully generated minimal perfect hash function\n"); fprintf(stderr, "Successfully generated minimal perfect hash function\n");
} }
#ifdef CMPH_TIMING #ifdef CMPH_TIMING
register cmph_uint32 space_usage = bdz_ph_packed_size(mphf)*8; register cmph_uint32 space_usage = bdz_ph_packed_size(mphf)*8;
register cmph_uint32 keys_per_bucket = 1; register cmph_uint32 keys_per_bucket = 1;
construction_time = construction_time - construction_time_begin; construction_time = construction_time - construction_time_begin;
fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", bdz_ph->m, bdz_ph->m/(double)bdz_ph->n, keys_per_bucket, construction_time, space_usage/(double)bdz_ph->m); fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", bdz_ph->m, bdz_ph->m/(double)bdz_ph->n, keys_per_bucket, construction_time, space_usage/(double)bdz_ph->m);
#endif #endif
return mphf; return mphf;
} }
static int bdz_ph_mapping(cmph_config_t *mph, bdz_ph_graph3_t* graph3, bdz_ph_queue_t queue) static int bdz_ph_mapping(cmph_config_t *mph, bdz_ph_graph3_t* graph3, bdz_ph_queue_t queue)
{ {
cmph_uint32 e; cmph_uint32 e;
int cycles = 0; int cycles = 0;
cmph_uint32 hl[3]; cmph_uint32 hl[3];
bdz_ph_config_data_t *bdz_ph = (bdz_ph_config_data_t *)mph->data; bdz_ph_config_data_t *bdz_ph = (bdz_ph_config_data_t *)mph->data;
bdz_ph_init_graph3(graph3, bdz_ph->m, bdz_ph->n); bdz_ph_init_graph3(graph3, bdz_ph->m, bdz_ph->n);
mph->key_source->rewind(mph->key_source->data); mph->key_source->rewind(mph->key_source->data);
@ -363,7 +363,7 @@ static int bdz_ph_mapping(cmph_config_t *mph, bdz_ph_graph3_t* graph3, bdz_ph_qu
cmph_uint32 h0, h1, h2; cmph_uint32 h0, h1, h2;
cmph_uint32 keylen; cmph_uint32 keylen;
char *key = NULL; char *key = NULL;
mph->key_source->read(mph->key_source->data, &key, &keylen); mph->key_source->read(mph->key_source->data, &key, &keylen);
hash_vector(bdz_ph->hl, key, keylen, hl); hash_vector(bdz_ph->hl, key, keylen, hl);
h0 = hl[0] % bdz_ph->r; h0 = hl[0] % bdz_ph->r;
h1 = hl[1] % bdz_ph->r + bdz_ph->r; h1 = hl[1] % bdz_ph->r + bdz_ph->r;
@ -371,7 +371,7 @@ static int bdz_ph_mapping(cmph_config_t *mph, bdz_ph_graph3_t* graph3, bdz_ph_qu
mph->key_source->dispose(mph->key_source->data, key, keylen); mph->key_source->dispose(mph->key_source->data, key, keylen);
bdz_ph_add_edge(graph3,h0,h1,h2); bdz_ph_add_edge(graph3,h0,h1,h2);
} }
cycles = bdz_ph_generate_queue(bdz_ph->m, bdz_ph->n, queue, graph3); cycles = bdz_ph_generate_queue(bdz_ph->m, bdz_ph->n, queue, graph3);
return (cycles == 0); return (cycles == 0);
} }
@ -383,7 +383,7 @@ static void assigning(bdz_ph_config_data_t *bdz_ph, bdz_ph_graph3_t* graph3, bdz
cmph_uint32 v0,v1,v2; cmph_uint32 v0,v1,v2;
cmph_uint8 * marked_vertices =malloc((size_t)(bdz_ph->n >> 3) + 1); cmph_uint8 * marked_vertices =malloc((size_t)(bdz_ph->n >> 3) + 1);
cmph_uint32 sizeg = (cmph_uint32)ceil(bdz_ph->n/4.0); cmph_uint32 sizeg = (cmph_uint32)ceil(bdz_ph->n/4.0);
bdz_ph->g = (cmph_uint8 *)calloc((size_t)sizeg, sizeof(cmph_uint8)); bdz_ph->g = (cmph_uint8 *)calloc((size_t)sizeg, sizeof(cmph_uint8));
memset(marked_vertices, 0, (size_t)(bdz_ph->n >> 3) + 1); memset(marked_vertices, 0, (size_t)(bdz_ph->n >> 3) + 1);
//memset(bdz_ph->g, 0xff, sizeg); //memset(bdz_ph->g, 0xff, sizeg);
@ -396,14 +396,14 @@ static void assigning(bdz_ph_config_data_t *bdz_ph, bdz_ph_graph3_t* graph3, bdz
if(!GETBIT(marked_vertices, v0)){ if(!GETBIT(marked_vertices, v0)){
if(!GETBIT(marked_vertices,v1)) if(!GETBIT(marked_vertices,v1))
{ {
//SETVALUE(bdz_ph->g, v1, UNASSIGNED); //SETVALUE(bdz_ph->g, v1, UNASSIGNED);
SETBIT(marked_vertices, v1); SETBIT(marked_vertices, v1);
} }
if(!GETBIT(marked_vertices,v2)) if(!GETBIT(marked_vertices,v2))
{ {
//SETVALUE(bdz_ph->g, v2, UNASSIGNED); //SETVALUE(bdz_ph->g, v2, UNASSIGNED);
SETBIT(marked_vertices, v2); SETBIT(marked_vertices, v2);
} }
SETVALUE0(bdz_ph->g, v0, (6-(GETVALUE(bdz_ph->g, v1) + GETVALUE(bdz_ph->g,v2)))%3); SETVALUE0(bdz_ph->g, v0, (6-(GETVALUE(bdz_ph->g, v1) + GETVALUE(bdz_ph->g,v2)))%3);
SETBIT(marked_vertices, v0); SETBIT(marked_vertices, v0);
} else if(!GETBIT(marked_vertices, v1)) { } else if(!GETBIT(marked_vertices, v1)) {
@ -417,7 +417,7 @@ static void assigning(bdz_ph_config_data_t *bdz_ph, bdz_ph_graph3_t* graph3, bdz
}else { }else {
SETVALUE0(bdz_ph->g, v2, (8-(GETVALUE(bdz_ph->g,v0)+GETVALUE(bdz_ph->g, v1)))%3); SETVALUE0(bdz_ph->g, v2, (8-(GETVALUE(bdz_ph->g,v0)+GETVALUE(bdz_ph->g, v1)))%3);
SETBIT(marked_vertices, v2); SETBIT(marked_vertices, v2);
} }
DEBUGP("A:%u %u %u -- %u %u %u\n", v0, v1, v2, GETVALUE(bdz_ph->g, v0), GETVALUE(bdz_ph->g, v1), GETVALUE(bdz_ph->g, v2)); DEBUGP("A:%u %u %u -- %u %u %u\n", v0, v1, v2, GETVALUE(bdz_ph->g, v0), GETVALUE(bdz_ph->g, v1), GETVALUE(bdz_ph->g, v2));
}; };
free(marked_vertices); free(marked_vertices);
@ -428,11 +428,11 @@ static void bdz_ph_optimization(bdz_ph_config_data_t *bdz_ph)
cmph_uint32 i; cmph_uint32 i;
cmph_uint8 byte = 0; cmph_uint8 byte = 0;
cmph_uint32 sizeg = (cmph_uint32)ceil(bdz_ph->n/5.0); cmph_uint32 sizeg = (cmph_uint32)ceil(bdz_ph->n/5.0);
cmph_uint8 * new_g = (cmph_uint8 *)calloc((size_t)sizeg, sizeof(cmph_uint8)); cmph_uint8 * new_g = (cmph_uint8 *)calloc((size_t)sizeg, sizeof(cmph_uint8));
cmph_uint8 value; cmph_uint8 value;
cmph_uint32 idx; cmph_uint32 idx;
for(i = 0; i < bdz_ph->n; i++) for(i = 0; i < bdz_ph->n; i++)
{ {
idx = i/5; idx = i/5;
byte = new_g[idx]; byte = new_g[idx];
value = GETVALUE(bdz_ph->g, i); value = GETVALUE(bdz_ph->g, i);
@ -462,7 +462,7 @@ int bdz_ph_dump(cmph_t *mphf, FILE *fd)
nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd); nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd);
nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd); nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
nbytes = fwrite(&(data->r), sizeof(cmph_uint32), (size_t)1, fd); nbytes = fwrite(&(data->r), sizeof(cmph_uint32), (size_t)1, fd);
sizeg = (cmph_uint32)ceil(data->n/5.0); sizeg = (cmph_uint32)ceil(data->n/5.0);
nbytes = fwrite(data->g, sizeof(cmph_uint8)*sizeg, (size_t)1, fd); nbytes = fwrite(data->g, sizeof(cmph_uint8)*sizeg, (size_t)1, fd);
#ifdef DEBUG #ifdef DEBUG
@ -491,19 +491,19 @@ void bdz_ph_load(FILE *f, cmph_t *mphf)
nbytes = fread(buf, (size_t)buflen, (size_t)1, f); nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
bdz_ph->hl = hash_state_load(buf, buflen); bdz_ph->hl = hash_state_load(buf, buflen);
free(buf); free(buf);
DEBUGP("Reading m and n\n"); DEBUGP("Reading m and n\n");
nbytes = fread(&(bdz_ph->n), sizeof(cmph_uint32), (size_t)1, f); nbytes = fread(&(bdz_ph->n), sizeof(cmph_uint32), (size_t)1, f);
nbytes = fread(&(bdz_ph->m), sizeof(cmph_uint32), (size_t)1, f); nbytes = fread(&(bdz_ph->m), sizeof(cmph_uint32), (size_t)1, f);
nbytes = fread(&(bdz_ph->r), sizeof(cmph_uint32), (size_t)1, f); nbytes = fread(&(bdz_ph->r), sizeof(cmph_uint32), (size_t)1, f);
sizeg = (cmph_uint32)ceil(bdz_ph->n/5.0); sizeg = (cmph_uint32)ceil(bdz_ph->n/5.0);
bdz_ph->g = (cmph_uint8 *)calloc((size_t)sizeg, sizeof(cmph_uint8)); bdz_ph->g = (cmph_uint8 *)calloc((size_t)sizeg, sizeof(cmph_uint8));
nbytes = fread(bdz_ph->g, sizeg*sizeof(cmph_uint8), (size_t)1, f); nbytes = fread(bdz_ph->g, sizeg*sizeof(cmph_uint8), (size_t)1, f);
return; return;
} }
cmph_uint32 bdz_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen) cmph_uint32 bdz_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
{ {
@ -520,12 +520,12 @@ cmph_uint32 bdz_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
byte0 = bdz_ph->g[hl[0]/5]; byte0 = bdz_ph->g[hl[0]/5];
byte1 = bdz_ph->g[hl[1]/5]; byte1 = bdz_ph->g[hl[1]/5];
byte2 = bdz_ph->g[hl[2]/5]; byte2 = bdz_ph->g[hl[2]/5];
byte0 = lookup_table[hl[0]%5U][byte0]; byte0 = lookup_table[hl[0]%5U][byte0];
byte1 = lookup_table[hl[1]%5U][byte1]; byte1 = lookup_table[hl[1]%5U][byte1];
byte2 = lookup_table[hl[2]%5U][byte2]; byte2 = lookup_table[hl[2]%5U][byte2];
vertex = hl[(byte0 + byte1 + byte2)%3]; vertex = hl[(byte0 + byte1 + byte2)%3];
return vertex; return vertex;
} }
@ -533,7 +533,7 @@ cmph_uint32 bdz_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
void bdz_ph_destroy(cmph_t *mphf) void bdz_ph_destroy(cmph_t *mphf)
{ {
bdz_ph_data_t *data = (bdz_ph_data_t *)mphf->data; bdz_ph_data_t *data = (bdz_ph_data_t *)mphf->data;
free(data->g); free(data->g);
hash_state_destroy(data->hl); hash_state_destroy(data->hl);
free(data); free(data);
free(mphf); free(mphf);
@ -571,17 +571,17 @@ void bdz_ph_pack(cmph_t *mphf, void *packed_mphf)
* \brief Return the amount of space needed to pack mphf. * \brief Return the amount of space needed to pack mphf.
* \param mphf pointer to a mphf * \param mphf pointer to a mphf
* \return the size of the packed function or zero for failures * \return the size of the packed function or zero for failures
*/ */
cmph_uint32 bdz_ph_packed_size(cmph_t *mphf) cmph_uint32 bdz_ph_packed_size(cmph_t *mphf)
{ {
bdz_ph_data_t *data = (bdz_ph_data_t *)mphf->data; bdz_ph_data_t *data = (bdz_ph_data_t *)mphf->data;
CMPH_HASH hl_type = hash_get_type(data->hl); CMPH_HASH hl_type = hash_get_type(data->hl);
cmph_uint32 sizeg = (cmph_uint32)ceil(data->n/5.0); cmph_uint32 sizeg = (cmph_uint32)ceil(data->n/5.0);
return (cmph_uint32) (sizeof(CMPH_ALGO) + hash_state_packed_size(hl_type) + 2*sizeof(cmph_uint32) + sizeof(cmph_uint8)*sizeg); return (cmph_uint32) (sizeof(CMPH_ALGO) + hash_state_packed_size(hl_type) + 2*sizeof(cmph_uint32) + sizeof(cmph_uint8)*sizeg);
} }
/** cmph_uint32 bdz_ph_search(void *packed_mphf, const char *key, cmph_uint32 keylen); /** cmph_uint32 bdz_ph_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
* \brief Use the packed mphf to do a search. * \brief Use the packed mphf to do a search.
* \param packed_mphf pointer to the packed mphf * \param packed_mphf pointer to the packed mphf
* \param key key to be hashed * \param key key to be hashed
* \param keylen key legth in bytes * \param keylen key legth in bytes
@ -589,21 +589,21 @@ cmph_uint32 bdz_ph_packed_size(cmph_t *mphf)
*/ */
cmph_uint32 bdz_ph_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen) cmph_uint32 bdz_ph_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
{ {
register CMPH_HASH hl_type = *(cmph_uint32 *)packed_mphf; register CMPH_HASH hl_type = *(cmph_uint32 *)packed_mphf;
register cmph_uint8 *hl_ptr = (cmph_uint8 *)(packed_mphf) + 4; register cmph_uint8 *hl_ptr = (cmph_uint8 *)(packed_mphf) + 4;
register cmph_uint8 * ptr = hl_ptr + hash_state_packed_size(hl_type); register cmph_uint8 * ptr = hl_ptr + hash_state_packed_size(hl_type);
register cmph_uint32 r = *((cmph_uint32*) ptr); register cmph_uint32 r = *((cmph_uint32*) ptr);
register cmph_uint8 * g = ptr + 4; register cmph_uint8 * g = ptr + 4;
cmph_uint32 hl[3]; cmph_uint32 hl[3];
register cmph_uint8 byte0, byte1, byte2; register cmph_uint8 byte0, byte1, byte2;
register cmph_uint32 vertex; register cmph_uint32 vertex;
hash_vector_packed(hl_ptr, hl_type, key, keylen, hl); hash_vector_packed(hl_ptr, hl_type, key, keylen, hl);
hl[0] = hl[0] % r; hl[0] = hl[0] % r;
hl[1] = hl[1] % r + r; hl[1] = hl[1] % r + r;
hl[2] = hl[2] % r + (r << 1); hl[2] = hl[2] % r + (r << 1);
@ -611,11 +611,11 @@ cmph_uint32 bdz_ph_search_packed(void *packed_mphf, const char *key, cmph_uint32
byte0 = g[hl[0]/5]; byte0 = g[hl[0]/5];
byte1 = g[hl[1]/5]; byte1 = g[hl[1]/5];
byte2 = g[hl[2]/5]; byte2 = g[hl[2]/5];
byte0 = lookup_table[hl[0]%5][byte0]; byte0 = lookup_table[hl[0]%5][byte0];
byte1 = lookup_table[hl[1]%5][byte1]; byte1 = lookup_table[hl[1]%5][byte1];
byte2 = lookup_table[hl[2]%5][byte2]; byte2 = lookup_table[hl[2]%5][byte2];
vertex = hl[(byte0 + byte1 + byte2)%3]; vertex = hl[(byte0 + byte1 + byte2)%3];
return vertex; return vertex;
} }

View File

@ -128,4 +128,3 @@ int main(int argc, char** argv) {
lsmap_destroy(g_created_mphf); lsmap_destroy(g_created_mphf);
return 0; return 0;
} }

116
src/bmz.c
View File

@ -24,7 +24,7 @@ bmz_config_data_t *bmz_config_new(void)
{ {
bmz_config_data_t *bmz = NULL; bmz_config_data_t *bmz = NULL;
bmz = (bmz_config_data_t *)malloc(sizeof(bmz_config_data_t)); bmz = (bmz_config_data_t *)malloc(sizeof(bmz_config_data_t));
assert(bmz); if (!bmz) return NULL;
memset(bmz, 0, sizeof(bmz_config_data_t)); memset(bmz, 0, sizeof(bmz_config_data_t));
bmz->hashfuncs[0] = CMPH_HASH_JENKINS; bmz->hashfuncs[0] = CMPH_HASH_JENKINS;
bmz->hashfuncs[1] = CMPH_HASH_JENKINS; bmz->hashfuncs[1] = CMPH_HASH_JENKINS;
@ -49,7 +49,7 @@ void bmz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
while(*hashptr != CMPH_HASH_COUNT) while(*hashptr != CMPH_HASH_COUNT)
{ {
if (i >= 2) break; //bmz only uses two hash functions if (i >= 2) break; //bmz only uses two hash functions
bmz->hashfuncs[i] = *hashptr; bmz->hashfuncs[i] = *hashptr;
++i, ++hashptr; ++i, ++hashptr;
} }
} }
@ -68,8 +68,8 @@ cmph_t *bmz_new(cmph_config_t *mph, double c)
bmz_config_data_t *bmz = (bmz_config_data_t *)mph->data; bmz_config_data_t *bmz = (bmz_config_data_t *)mph->data;
if (c == 0) c = 1.15; // validating restrictions over parameter c. if (c == 0) c = 1.15; // validating restrictions over parameter c.
DEBUGP("c: %f\n", c); DEBUGP("c: %f\n", c);
bmz->m = mph->key_source->nkeys; bmz->m = mph->key_source->nkeys;
bmz->n = (cmph_uint32)ceil(c * mph->key_source->nkeys); bmz->n = (cmph_uint32)ceil(c * mph->key_source->nkeys);
DEBUGP("m (edges): %u n (vertices): %u c: %f\n", bmz->m, bmz->n, c); DEBUGP("m (edges): %u n (vertices): %u c: %f\n", bmz->m, bmz->n, c);
bmz->graph = graph_new(bmz->n, bmz->m); bmz->graph = graph_new(bmz->n, bmz->m);
DEBUGP("Created graph\n"); DEBUGP("Created graph\n");
@ -81,7 +81,7 @@ cmph_t *bmz_new(cmph_config_t *mph, double c)
{ {
// Mapping step // Mapping step
cmph_uint32 biggest_g_value = 0; cmph_uint32 biggest_g_value = 0;
cmph_uint32 biggest_edge_value = 1; cmph_uint32 biggest_edge_value = 1;
iterations = 100; iterations = 100;
if (mph->verbosity) if (mph->verbosity)
{ {
@ -109,12 +109,12 @@ cmph_t *bmz_new(cmph_config_t *mph, double c)
fprintf(stderr, "simple graph creation failure - %u iterations remaining\n", iterations); fprintf(stderr, "simple graph creation failure - %u iterations remaining\n", iterations);
} }
if (iterations == 0) break; if (iterations == 0) break;
} }
else break; else break;
} }
if (iterations == 0) if (iterations == 0)
{ {
graph_destroy(bmz->graph); graph_destroy(bmz->graph);
return NULL; return NULL;
} }
// Ordering step // Ordering step
@ -155,17 +155,17 @@ cmph_t *bmz_new(cmph_config_t *mph, double c)
} }
bmz_traverse_non_critical_nodes(bmz, used_edges, visited); // non_critical_nodes bmz_traverse_non_critical_nodes(bmz, used_edges, visited); // non_critical_nodes
} }
else else
{ {
iterations_map--; iterations_map--;
if (mph->verbosity) fprintf(stderr, "Restarting mapping step. %u iterations remaining.\n", iterations_map); if (mph->verbosity) fprintf(stderr, "Restarting mapping step. %u iterations remaining.\n", iterations_map);
} }
free(used_edges); free(used_edges);
free(visited); free(visited);
} while(restart_mapping && iterations_map > 0); } while(restart_mapping && iterations_map > 0);
graph_destroy(bmz->graph); graph_destroy(bmz->graph);
bmz->graph = NULL; bmz->graph = NULL;
if (iterations_map == 0) if (iterations_map == 0)
{ {
return NULL; return NULL;
} }
@ -212,15 +212,15 @@ static cmph_uint8 bmz_traverse_critical_nodes(bmz_config_data_t *bmz, cmph_uint3
while(!vqueue_is_empty(q)) while(!vqueue_is_empty(q))
{ {
v = vqueue_remove(q); v = vqueue_remove(q);
it = graph_neighbors_it(bmz->graph, v); it = graph_neighbors_it(bmz->graph, v);
while ((u = graph_next_neighbor(bmz->graph, &it)) != GRAPH_NO_NEIGHBOR) while ((u = graph_next_neighbor(bmz->graph, &it)) != GRAPH_NO_NEIGHBOR)
{ {
if (graph_node_is_critical(bmz->graph, u) && (!GETBIT(visited,u))) if (graph_node_is_critical(bmz->graph, u) && (!GETBIT(visited,u)))
{ {
collision = 1; collision = 1;
while(collision) // lookahead to resolve collisions while(collision) // lookahead to resolve collisions
{ {
next_g = *biggest_g_value + 1; next_g = *biggest_g_value + 1;
it1 = graph_neighbors_it(bmz->graph, u); it1 = graph_neighbors_it(bmz->graph, u);
collision = 0; collision = 0;
while((lav = graph_next_neighbor(bmz->graph, &it1)) != GRAPH_NO_NEIGHBOR) while((lav = graph_next_neighbor(bmz->graph, &it1)) != GRAPH_NO_NEIGHBOR)
@ -232,7 +232,7 @@ static cmph_uint8 bmz_traverse_critical_nodes(bmz_config_data_t *bmz, cmph_uint3
vqueue_destroy(q); vqueue_destroy(q);
return 1; // restart mapping step. return 1; // restart mapping step.
} }
if (GETBIT(used_edges, (next_g + bmz->g[lav]))) if (GETBIT(used_edges, (next_g + bmz->g[lav])))
{ {
collision = 1; collision = 1;
break; break;
@ -240,7 +240,7 @@ static cmph_uint8 bmz_traverse_critical_nodes(bmz_config_data_t *bmz, cmph_uint3
} }
} }
if (next_g > *biggest_g_value) *biggest_g_value = next_g; if (next_g > *biggest_g_value) *biggest_g_value = next_g;
} }
// Marking used edges... // Marking used edges...
it1 = graph_neighbors_it(bmz->graph, u); it1 = graph_neighbors_it(bmz->graph, u);
while((lav = graph_next_neighbor(bmz->graph, &it1)) != GRAPH_NO_NEIGHBOR) while((lav = graph_next_neighbor(bmz->graph, &it1)) != GRAPH_NO_NEIGHBOR)
@ -254,9 +254,9 @@ static cmph_uint8 bmz_traverse_critical_nodes(bmz_config_data_t *bmz, cmph_uint3
bmz->g[u] = next_g; // Labelling vertex u. bmz->g[u] = next_g; // Labelling vertex u.
SETBIT(visited,u); SETBIT(visited,u);
vqueue_insert(q, u); vqueue_insert(q, u);
} }
} }
} }
vqueue_destroy(q); vqueue_destroy(q);
return 0; return 0;
@ -282,22 +282,22 @@ static cmph_uint8 bmz_traverse_critical_nodes_heuristic(bmz_config_data_t *bmz,
while(!vqueue_is_empty(q)) while(!vqueue_is_empty(q))
{ {
v = vqueue_remove(q); v = vqueue_remove(q);
it = graph_neighbors_it(bmz->graph, v); it = graph_neighbors_it(bmz->graph, v);
while ((u = graph_next_neighbor(bmz->graph, &it)) != GRAPH_NO_NEIGHBOR) while ((u = graph_next_neighbor(bmz->graph, &it)) != GRAPH_NO_NEIGHBOR)
{ {
if (graph_node_is_critical(bmz->graph, u) && (!GETBIT(visited,u))) if (graph_node_is_critical(bmz->graph, u) && (!GETBIT(visited,u)))
{ {
cmph_uint32 next_g_index = 0; cmph_uint32 next_g_index = 0;
collision = 1; collision = 1;
while(collision) // lookahead to resolve collisions while(collision) // lookahead to resolve collisions
{ {
if (next_g_index < nunused_g_values) if (next_g_index < nunused_g_values)
{ {
next_g = unused_g_values[next_g_index++]; next_g = unused_g_values[next_g_index++];
} }
else else
{ {
next_g = *biggest_g_value + 1; next_g = *biggest_g_value + 1;
next_g_index = UINT_MAX; next_g_index = UINT_MAX;
} }
it1 = graph_neighbors_it(bmz->graph, u); it1 = graph_neighbors_it(bmz->graph, u);
@ -312,7 +312,7 @@ static cmph_uint8 bmz_traverse_critical_nodes_heuristic(bmz_config_data_t *bmz,
free(unused_g_values); free(unused_g_values);
return 1; // restart mapping step. return 1; // restart mapping step.
} }
if (GETBIT(used_edges, (next_g + bmz->g[lav]))) if (GETBIT(used_edges, (next_g + bmz->g[lav])))
{ {
collision = 1; collision = 1;
break; break;
@ -324,13 +324,13 @@ static cmph_uint8 bmz_traverse_critical_nodes_heuristic(bmz_config_data_t *bmz,
if(nunused_g_values == unused_g_values_capacity) if(nunused_g_values == unused_g_values_capacity)
{ {
unused_g_values = (cmph_uint32 *)realloc(unused_g_values, (unused_g_values_capacity + BUFSIZ)*sizeof(cmph_uint32)); unused_g_values = (cmph_uint32 *)realloc(unused_g_values, (unused_g_values_capacity + BUFSIZ)*sizeof(cmph_uint32));
unused_g_values_capacity += BUFSIZ; unused_g_values_capacity += BUFSIZ;
} }
unused_g_values[nunused_g_values++] = next_g; unused_g_values[nunused_g_values++] = next_g;
} }
if (next_g > *biggest_g_value) *biggest_g_value = next_g; if (next_g > *biggest_g_value) *biggest_g_value = next_g;
} }
next_g_index--; next_g_index--;
if (next_g_index < nunused_g_values) unused_g_values[next_g_index] = unused_g_values[--nunused_g_values]; if (next_g_index < nunused_g_values) unused_g_values[next_g_index] = unused_g_values[--nunused_g_values];
@ -347,13 +347,13 @@ static cmph_uint8 bmz_traverse_critical_nodes_heuristic(bmz_config_data_t *bmz,
bmz->g[u] = next_g; // Labelling vertex u. bmz->g[u] = next_g; // Labelling vertex u.
SETBIT(visited, u); SETBIT(visited, u);
vqueue_insert(q, u); vqueue_insert(q, u);
} }
} }
} }
vqueue_destroy(q); vqueue_destroy(q);
free(unused_g_values); free(unused_g_values);
return 0; return 0;
} }
static cmph_uint32 next_unused_edge(bmz_config_data_t *bmz, cmph_uint8 * used_edges, cmph_uint32 unused_edge_index) static cmph_uint32 next_unused_edge(bmz_config_data_t *bmz, cmph_uint8 * used_edges, cmph_uint32 unused_edge_index)
@ -381,8 +381,8 @@ static void bmz_traverse(bmz_config_data_t *bmz, cmph_uint8 * used_edges, cmph_u
SETBIT(visited, neighbor); SETBIT(visited, neighbor);
(*unused_edge_index)++; (*unused_edge_index)++;
bmz_traverse(bmz, used_edges, neighbor, unused_edge_index, visited); bmz_traverse(bmz, used_edges, neighbor, unused_edge_index, visited);
} }
} }
static void bmz_traverse_non_critical_nodes(bmz_config_data_t *bmz, cmph_uint8 * used_edges, cmph_uint8 * visited) static void bmz_traverse_non_critical_nodes(bmz_config_data_t *bmz, cmph_uint8 * used_edges, cmph_uint8 * visited)
@ -394,7 +394,7 @@ static void bmz_traverse_non_critical_nodes(bmz_config_data_t *bmz, cmph_uint8 *
{ {
v1 = graph_vertex_id(bmz->graph, i, 0); v1 = graph_vertex_id(bmz->graph, i, 0);
v2 = graph_vertex_id(bmz->graph, i, 1); v2 = graph_vertex_id(bmz->graph, i, 1);
if((GETBIT(visited,v1) && GETBIT(visited,v2)) || (!GETBIT(visited,v1) && !GETBIT(visited,v2))) continue; if((GETBIT(visited,v1) && GETBIT(visited,v2)) || (!GETBIT(visited,v1) && !GETBIT(visited,v2))) continue;
if(GETBIT(visited,v1)) bmz_traverse(bmz, used_edges, v1, &unused_edge_index, visited); if(GETBIT(visited,v1)) bmz_traverse(bmz, used_edges, v1, &unused_edge_index, visited);
else bmz_traverse(bmz, used_edges, v2, &unused_edge_index, visited); else bmz_traverse(bmz, used_edges, v2, &unused_edge_index, visited);
@ -403,7 +403,7 @@ static void bmz_traverse_non_critical_nodes(bmz_config_data_t *bmz, cmph_uint8 *
for(i = 0; i < bmz->n; i++) for(i = 0; i < bmz->n; i++)
{ {
if(!GETBIT(visited,i)) if(!GETBIT(visited,i))
{ {
bmz->g[i] = 0; bmz->g[i] = 0;
SETBIT(visited, i); SETBIT(visited, i);
bmz_traverse(bmz, used_edges, i, &unused_edge_index, visited); bmz_traverse(bmz, used_edges, i, &unused_edge_index, visited);
@ -411,14 +411,14 @@ static void bmz_traverse_non_critical_nodes(bmz_config_data_t *bmz, cmph_uint8 *
} }
} }
static int bmz_gen_edges(cmph_config_t *mph) static int bmz_gen_edges(cmph_config_t *mph)
{ {
cmph_uint32 e; cmph_uint32 e;
bmz_config_data_t *bmz = (bmz_config_data_t *)mph->data; bmz_config_data_t *bmz = (bmz_config_data_t *)mph->data;
cmph_uint8 multiple_edges = 0; cmph_uint8 multiple_edges = 0;
DEBUGP("Generating edges for %u vertices\n", bmz->n); DEBUGP("Generating edges for %u vertices\n", bmz->n);
graph_clear_edges(bmz->graph); graph_clear_edges(bmz->graph);
mph->key_source->rewind(mph->key_source->data); mph->key_source->rewind(mph->key_source->data);
for (e = 0; e < mph->key_source->nkeys; ++e) for (e = 0; e < mph->key_source->nkeys; ++e)
{ {
@ -426,12 +426,12 @@ static int bmz_gen_edges(cmph_config_t *mph)
cmph_uint32 keylen; cmph_uint32 keylen;
char *key = NULL; char *key = NULL;
mph->key_source->read(mph->key_source->data, &key, &keylen); mph->key_source->read(mph->key_source->data, &key, &keylen);
h1 = hash(bmz->hashes[0], key, keylen) % bmz->n; h1 = hash(bmz->hashes[0], key, keylen) % bmz->n;
h2 = hash(bmz->hashes[1], key, keylen) % bmz->n; h2 = hash(bmz->hashes[1], key, keylen) % bmz->n;
if (h1 == h2) if (++h2 >= bmz->n) h2 = 0; if (h1 == h2) if (++h2 >= bmz->n) h2 = 0;
DEBUGP("key: %.*s h1: %u h2: %u\n", keylen, key, h1, h2); DEBUGP("key: %.*s h1: %u h2: %u\n", keylen, key, h1, h2);
if (h1 == h2) if (h1 == h2)
{ {
if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e); if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e);
mph->key_source->dispose(mph->key_source->data, key, keylen); mph->key_source->dispose(mph->key_source->data, key, keylen);
@ -472,7 +472,7 @@ int bmz_dump(cmph_t *mphf, FILE *fd)
nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd); nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd);
nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd); nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
nbytes = fwrite(data->g, sizeof(cmph_uint32)*(data->n), (size_t)1, fd); nbytes = fwrite(data->g, sizeof(cmph_uint32)*(data->n), (size_t)1, fd);
#ifdef DEBUG #ifdef DEBUG
cmph_uint32 i; cmph_uint32 i;
@ -510,8 +510,8 @@ void bmz_load(FILE *f, cmph_t *mphf)
} }
DEBUGP("Reading m and n\n"); DEBUGP("Reading m and n\n");
nbytes = fread(&(bmz->n), sizeof(cmph_uint32), (size_t)1, f); nbytes = fread(&(bmz->n), sizeof(cmph_uint32), (size_t)1, f);
nbytes = fread(&(bmz->m), sizeof(cmph_uint32), (size_t)1, f); nbytes = fread(&(bmz->m), sizeof(cmph_uint32), (size_t)1, f);
bmz->g = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*bmz->n); bmz->g = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*bmz->n);
nbytes = fread(bmz->g, bmz->n*sizeof(cmph_uint32), (size_t)1, f); nbytes = fread(bmz->g, bmz->n*sizeof(cmph_uint32), (size_t)1, f);
@ -522,7 +522,7 @@ void bmz_load(FILE *f, cmph_t *mphf)
#endif #endif
return; return;
} }
cmph_uint32 bmz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen) cmph_uint32 bmz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
{ {
@ -537,7 +537,7 @@ cmph_uint32 bmz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
void bmz_destroy(cmph_t *mphf) void bmz_destroy(cmph_t *mphf)
{ {
bmz_data_t *data = (bmz_data_t *)mphf->data; bmz_data_t *data = (bmz_data_t *)mphf->data;
free(data->g); free(data->g);
hash_state_destroy(data->hashes[0]); hash_state_destroy(data->hashes[0]);
hash_state_destroy(data->hashes[1]); hash_state_destroy(data->hashes[1]);
free(data->hashes); free(data->hashes);
@ -548,7 +548,7 @@ void bmz_destroy(cmph_t *mphf)
/** \fn void bmz_pack(cmph_t *mphf, void *packed_mphf); /** \fn void bmz_pack(cmph_t *mphf, void *packed_mphf);
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
* \param mphf pointer to the resulting mphf * \param mphf pointer to the resulting mphf
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
*/ */
void bmz_pack(cmph_t *mphf, void *packed_mphf) void bmz_pack(cmph_t *mphf, void *packed_mphf)
{ {
@ -579,26 +579,26 @@ void bmz_pack(cmph_t *mphf, void *packed_mphf)
ptr += sizeof(data->n); ptr += sizeof(data->n);
// packing g // packing g
memcpy(ptr, data->g, sizeof(cmph_uint32)*data->n); memcpy(ptr, data->g, sizeof(cmph_uint32)*data->n);
} }
/** \fn cmph_uint32 bmz_packed_size(cmph_t *mphf); /** \fn cmph_uint32 bmz_packed_size(cmph_t *mphf);
* \brief Return the amount of space needed to pack mphf. * \brief Return the amount of space needed to pack mphf.
* \param mphf pointer to a mphf * \param mphf pointer to a mphf
* \return the size of the packed function or zero for failures * \return the size of the packed function or zero for failures
*/ */
cmph_uint32 bmz_packed_size(cmph_t *mphf) cmph_uint32 bmz_packed_size(cmph_t *mphf)
{ {
bmz_data_t *data = (bmz_data_t *)mphf->data; bmz_data_t *data = (bmz_data_t *)mphf->data;
CMPH_HASH h1_type = hash_get_type(data->hashes[0]); CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
CMPH_HASH h2_type = hash_get_type(data->hashes[1]); CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) + return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
3*sizeof(cmph_uint32) + sizeof(cmph_uint32)*data->n); 3*sizeof(cmph_uint32) + sizeof(cmph_uint32)*data->n);
} }
/** cmph_uint32 bmz_search(void *packed_mphf, const char *key, cmph_uint32 keylen); /** cmph_uint32 bmz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
* \brief Use the packed mphf to do a search. * \brief Use the packed mphf to do a search.
* \param packed_mphf pointer to the packed mphf * \param packed_mphf pointer to the packed mphf
* \param key key to be hashed * \param key key to be hashed
* \param keylen key legth in bytes * \param keylen key legth in bytes
@ -613,13 +613,13 @@ cmph_uint32 bmz_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke
register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type); register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr); register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr);
h2_ptr += 4; h2_ptr += 4;
register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type)); register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type));
register cmph_uint32 n = *g_ptr++; register cmph_uint32 n = *g_ptr++;
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n; register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n;
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n; register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n;
if (h1 == h2 && ++h2 > n) h2 = 0; if (h1 == h2 && ++h2 > n) h2 = 0;
return (g_ptr[h1] + g_ptr[h2]); return (g_ptr[h1] + g_ptr[h2]);
} }

View File

@ -23,7 +23,7 @@ bmz8_config_data_t *bmz8_config_new(void)
{ {
bmz8_config_data_t *bmz8; bmz8_config_data_t *bmz8;
bmz8 = (bmz8_config_data_t *)malloc(sizeof(bmz8_config_data_t)); bmz8 = (bmz8_config_data_t *)malloc(sizeof(bmz8_config_data_t));
assert(bmz8); if (!bmz8) return NULL;
memset(bmz8, 0, sizeof(bmz8_config_data_t)); memset(bmz8, 0, sizeof(bmz8_config_data_t));
bmz8->hashfuncs[0] = CMPH_HASH_JENKINS; bmz8->hashfuncs[0] = CMPH_HASH_JENKINS;
bmz8->hashfuncs[1] = CMPH_HASH_JENKINS; bmz8->hashfuncs[1] = CMPH_HASH_JENKINS;
@ -48,7 +48,7 @@ void bmz8_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
while(*hashptr != CMPH_HASH_COUNT) while(*hashptr != CMPH_HASH_COUNT)
{ {
if (i >= 2) break; //bmz8 only uses two hash functions if (i >= 2) break; //bmz8 only uses two hash functions
bmz8->hashfuncs[i] = *hashptr; bmz8->hashfuncs[i] = *hashptr;
++i, ++hashptr; ++i, ++hashptr;
} }
} }
@ -64,7 +64,7 @@ cmph_t *bmz8_new(cmph_config_t *mph, double c)
cmph_uint8 restart_mapping = 0; cmph_uint8 restart_mapping = 0;
cmph_uint8 * visited = NULL; cmph_uint8 * visited = NULL;
bmz8_config_data_t *bmz8 = (bmz8_config_data_t *)mph->data; bmz8_config_data_t *bmz8 = (bmz8_config_data_t *)mph->data;
if (mph->key_source->nkeys >= 256) if (mph->key_source->nkeys >= 256)
{ {
if (mph->verbosity) fprintf(stderr, "The number of keys in BMZ8 must be lower than 256.\n"); if (mph->verbosity) fprintf(stderr, "The number of keys in BMZ8 must be lower than 256.\n");
@ -72,8 +72,8 @@ cmph_t *bmz8_new(cmph_config_t *mph, double c)
} }
if (c == 0) c = 1.15; // validating restrictions over parameter c. if (c == 0) c = 1.15; // validating restrictions over parameter c.
DEBUGP("c: %f\n", c); DEBUGP("c: %f\n", c);
bmz8->m = (cmph_uint8) mph->key_source->nkeys; bmz8->m = (cmph_uint8) mph->key_source->nkeys;
bmz8->n = (cmph_uint8) ceil(c * mph->key_source->nkeys); bmz8->n = (cmph_uint8) ceil(c * mph->key_source->nkeys);
DEBUGP("m (edges): %u n (vertices): %u c: %f\n", bmz8->m, bmz8->n, c); DEBUGP("m (edges): %u n (vertices): %u c: %f\n", bmz8->m, bmz8->n, c);
bmz8->graph = graph_new(bmz8->n, bmz8->m); bmz8->graph = graph_new(bmz8->n, bmz8->m);
DEBUGP("Created graph\n"); DEBUGP("Created graph\n");
@ -113,8 +113,8 @@ cmph_t *bmz8_new(cmph_config_t *mph, double c)
fprintf(stderr, "simple graph creation failure - %u iterations remaining\n", iterations); fprintf(stderr, "simple graph creation failure - %u iterations remaining\n", iterations);
} }
if (iterations == 0) break; if (iterations == 0) break;
} }
else break; else break;
} }
if (iterations == 0) if (iterations == 0)
{ {
@ -161,19 +161,19 @@ cmph_t *bmz8_new(cmph_config_t *mph, double c)
} }
bmz8_traverse_non_critical_nodes(bmz8, used_edges, visited); // non_critical_nodes bmz8_traverse_non_critical_nodes(bmz8, used_edges, visited); // non_critical_nodes
} }
else else
{ {
iterations_map--; iterations_map--;
if (mph->verbosity) fprintf(stderr, "Restarting mapping step. %u iterations remaining.\n", iterations_map); if (mph->verbosity) fprintf(stderr, "Restarting mapping step. %u iterations remaining.\n", iterations_map);
} }
free(used_edges); free(used_edges);
free(visited); free(visited);
}while(restart_mapping && iterations_map > 0); }while(restart_mapping && iterations_map > 0);
graph_destroy(bmz8->graph); graph_destroy(bmz8->graph);
bmz8->graph = NULL; bmz8->graph = NULL;
if (iterations_map == 0) if (iterations_map == 0)
{ {
return NULL; return NULL;
} }
@ -213,15 +213,15 @@ static cmph_uint8 bmz8_traverse_critical_nodes(bmz8_config_data_t *bmz8, cmph_ui
while(!vqueue_is_empty(q)) while(!vqueue_is_empty(q))
{ {
v = vqueue_remove(q); v = vqueue_remove(q);
it = graph_neighbors_it(bmz8->graph, v); it = graph_neighbors_it(bmz8->graph, v);
while ((u = graph_next_neighbor(bmz8->graph, &it)) != GRAPH_NO_NEIGHBOR) while ((u = graph_next_neighbor(bmz8->graph, &it)) != GRAPH_NO_NEIGHBOR)
{ {
if (graph_node_is_critical(bmz8->graph, u) && (!GETBIT(visited,u))) if (graph_node_is_critical(bmz8->graph, u) && (!GETBIT(visited,u)))
{ {
collision = 1; collision = 1;
while(collision) // lookahead to resolve collisions while(collision) // lookahead to resolve collisions
{ {
next_g = (cmph_uint8)(*biggest_g_value + 1); next_g = (cmph_uint8)(*biggest_g_value + 1);
it1 = graph_neighbors_it(bmz8->graph, u); it1 = graph_neighbors_it(bmz8->graph, u);
collision = 0; collision = 0;
while((lav = graph_next_neighbor(bmz8->graph, &it1)) != GRAPH_NO_NEIGHBOR) while((lav = graph_next_neighbor(bmz8->graph, &it1)) != GRAPH_NO_NEIGHBOR)
@ -233,7 +233,7 @@ static cmph_uint8 bmz8_traverse_critical_nodes(bmz8_config_data_t *bmz8, cmph_ui
vqueue_destroy(q); vqueue_destroy(q);
return 1; // restart mapping step. return 1; // restart mapping step.
} }
if (GETBIT(used_edges, (next_g + bmz8->g[lav]))) if (GETBIT(used_edges, (next_g + bmz8->g[lav])))
{ {
collision = 1; collision = 1;
break; break;
@ -241,7 +241,7 @@ static cmph_uint8 bmz8_traverse_critical_nodes(bmz8_config_data_t *bmz8, cmph_ui
} }
} }
if (next_g > *biggest_g_value) *biggest_g_value = next_g; if (next_g > *biggest_g_value) *biggest_g_value = next_g;
} }
// Marking used edges... // Marking used edges...
it1 = graph_neighbors_it(bmz8->graph, u); it1 = graph_neighbors_it(bmz8->graph, u);
while((lav = graph_next_neighbor(bmz8->graph, &it1)) != GRAPH_NO_NEIGHBOR) while((lav = graph_next_neighbor(bmz8->graph, &it1)) != GRAPH_NO_NEIGHBOR)
@ -250,16 +250,16 @@ static cmph_uint8 bmz8_traverse_critical_nodes(bmz8_config_data_t *bmz8, cmph_ui
{ {
SETBIT(used_edges,(next_g + bmz8->g[lav])); SETBIT(used_edges,(next_g + bmz8->g[lav]));
if(next_g + bmz8->g[lav] > *biggest_edge_value) if(next_g + bmz8->g[lav] > *biggest_edge_value)
*biggest_edge_value = (cmph_uint8)(next_g + bmz8->g[lav]); *biggest_edge_value = (cmph_uint8)(next_g + bmz8->g[lav]);
} }
} }
bmz8->g[u] = next_g; // Labelling vertex u. bmz8->g[u] = next_g; // Labelling vertex u.
SETBIT(visited,u); SETBIT(visited,u);
vqueue_insert(q, u); vqueue_insert(q, u);
} }
} }
} }
vqueue_destroy(q); vqueue_destroy(q);
return 0; return 0;
@ -268,8 +268,8 @@ static cmph_uint8 bmz8_traverse_critical_nodes(bmz8_config_data_t *bmz8, cmph_ui
static cmph_uint8 bmz8_traverse_critical_nodes_heuristic(bmz8_config_data_t *bmz8, cmph_uint32 v, cmph_uint8 * biggest_g_value, cmph_uint8 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited) static cmph_uint8 bmz8_traverse_critical_nodes_heuristic(bmz8_config_data_t *bmz8, cmph_uint32 v, cmph_uint8 * biggest_g_value, cmph_uint8 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited)
{ {
cmph_uint8 next_g; cmph_uint8 next_g;
cmph_uint32 u; cmph_uint32 u;
cmph_uint32 lav; cmph_uint32 lav;
cmph_uint8 collision; cmph_uint8 collision;
cmph_uint8 * unused_g_values = NULL; cmph_uint8 * unused_g_values = NULL;
cmph_uint8 unused_g_values_capacity = 0; cmph_uint8 unused_g_values_capacity = 0;
@ -280,27 +280,27 @@ static cmph_uint8 bmz8_traverse_critical_nodes_heuristic(bmz8_config_data_t *bmz
DEBUGP("Labelling critical vertices\n"); DEBUGP("Labelling critical vertices\n");
bmz8->g[v] = (cmph_uint8)(ceil ((double)(*biggest_edge_value)/2) - 1); bmz8->g[v] = (cmph_uint8)(ceil ((double)(*biggest_edge_value)/2) - 1);
SETBIT(visited, v); SETBIT(visited, v);
next_g = (cmph_uint8)floor((double)(*biggest_edge_value/2)); next_g = (cmph_uint8)floor((double)(*biggest_edge_value/2));
vqueue_insert(q, v); vqueue_insert(q, v);
while(!vqueue_is_empty(q)) while(!vqueue_is_empty(q))
{ {
v = vqueue_remove(q); v = vqueue_remove(q);
it = graph_neighbors_it(bmz8->graph, v); it = graph_neighbors_it(bmz8->graph, v);
while ((u = graph_next_neighbor(bmz8->graph, &it)) != GRAPH_NO_NEIGHBOR) while ((u = graph_next_neighbor(bmz8->graph, &it)) != GRAPH_NO_NEIGHBOR)
{ {
if (graph_node_is_critical(bmz8->graph, u) && (!GETBIT(visited,u))) if (graph_node_is_critical(bmz8->graph, u) && (!GETBIT(visited,u)))
{ {
cmph_uint8 next_g_index = 0; cmph_uint8 next_g_index = 0;
collision = 1; collision = 1;
while(collision) // lookahead to resolve collisions while(collision) // lookahead to resolve collisions
{ {
if (next_g_index < nunused_g_values) if (next_g_index < nunused_g_values)
{ {
next_g = unused_g_values[next_g_index++]; next_g = unused_g_values[next_g_index++];
} }
else else
{ {
next_g = (cmph_uint8)(*biggest_g_value + 1); next_g = (cmph_uint8)(*biggest_g_value + 1);
next_g_index = 255;//UINT_MAX; next_g_index = 255;//UINT_MAX;
} }
it1 = graph_neighbors_it(bmz8->graph, u); it1 = graph_neighbors_it(bmz8->graph, u);
@ -315,7 +315,7 @@ static cmph_uint8 bmz8_traverse_critical_nodes_heuristic(bmz8_config_data_t *bmz
free(unused_g_values); free(unused_g_values);
return 1; // restart mapping step. return 1; // restart mapping step.
} }
if (GETBIT(used_edges, (next_g + bmz8->g[lav]))) if (GETBIT(used_edges, (next_g + bmz8->g[lav])))
{ {
collision = 1; collision = 1;
break; break;
@ -327,14 +327,14 @@ static cmph_uint8 bmz8_traverse_critical_nodes_heuristic(bmz8_config_data_t *bmz
if(nunused_g_values == unused_g_values_capacity) if(nunused_g_values == unused_g_values_capacity)
{ {
unused_g_values = (cmph_uint8*)realloc(unused_g_values, ((size_t)(unused_g_values_capacity + BUFSIZ))*sizeof(cmph_uint8)); unused_g_values = (cmph_uint8*)realloc(unused_g_values, ((size_t)(unused_g_values_capacity + BUFSIZ))*sizeof(cmph_uint8));
unused_g_values_capacity += (cmph_uint8)BUFSIZ; unused_g_values_capacity += (cmph_uint8)BUFSIZ;
} }
unused_g_values[nunused_g_values++] = next_g; unused_g_values[nunused_g_values++] = next_g;
} }
if (next_g > *biggest_g_value) *biggest_g_value = next_g; if (next_g > *biggest_g_value) *biggest_g_value = next_g;
} }
next_g_index--; next_g_index--;
if (next_g_index < nunused_g_values) unused_g_values[next_g_index] = unused_g_values[--nunused_g_values]; if (next_g_index < nunused_g_values) unused_g_values[next_g_index] = unused_g_values[--nunused_g_values];
@ -345,22 +345,22 @@ static cmph_uint8 bmz8_traverse_critical_nodes_heuristic(bmz8_config_data_t *bmz
if (graph_node_is_critical(bmz8->graph, lav) && GETBIT(visited, lav)) if (graph_node_is_critical(bmz8->graph, lav) && GETBIT(visited, lav))
{ {
SETBIT(used_edges,(next_g + bmz8->g[lav])); SETBIT(used_edges,(next_g + bmz8->g[lav]));
if(next_g + bmz8->g[lav] > *biggest_edge_value) if(next_g + bmz8->g[lav] > *biggest_edge_value)
*biggest_edge_value = (cmph_uint8)(next_g + bmz8->g[lav]); *biggest_edge_value = (cmph_uint8)(next_g + bmz8->g[lav]);
} }
} }
bmz8->g[u] = next_g; // Labelling vertex u. bmz8->g[u] = next_g; // Labelling vertex u.
SETBIT(visited, u); SETBIT(visited, u);
vqueue_insert(q, u); vqueue_insert(q, u);
} }
} }
} }
vqueue_destroy(q); vqueue_destroy(q);
free(unused_g_values); free(unused_g_values);
return 0; return 0;
} }
static cmph_uint8 next_unused_edge(bmz8_config_data_t *bmz8, cmph_uint8 * used_edges, cmph_uint32 unused_edge_index) static cmph_uint8 next_unused_edge(bmz8_config_data_t *bmz8, cmph_uint8 * used_edges, cmph_uint32 unused_edge_index)
@ -388,8 +388,8 @@ static void bmz8_traverse(bmz8_config_data_t *bmz8, cmph_uint8 * used_edges, cmp
SETBIT(visited, neighbor); SETBIT(visited, neighbor);
(*unused_edge_index)++; (*unused_edge_index)++;
bmz8_traverse(bmz8, used_edges, neighbor, unused_edge_index, visited); bmz8_traverse(bmz8, used_edges, neighbor, unused_edge_index, visited);
} }
} }
static void bmz8_traverse_non_critical_nodes(bmz8_config_data_t *bmz8, cmph_uint8 * used_edges, cmph_uint8 * visited) static void bmz8_traverse_non_critical_nodes(bmz8_config_data_t *bmz8, cmph_uint8 * used_edges, cmph_uint8 * visited)
@ -401,7 +401,7 @@ static void bmz8_traverse_non_critical_nodes(bmz8_config_data_t *bmz8, cmph_uint
{ {
v1 = (cmph_uint8)graph_vertex_id(bmz8->graph, i, 0); v1 = (cmph_uint8)graph_vertex_id(bmz8->graph, i, 0);
v2 = (cmph_uint8)graph_vertex_id(bmz8->graph, i, 1); v2 = (cmph_uint8)graph_vertex_id(bmz8->graph, i, 1);
if((GETBIT(visited,v1) && GETBIT(visited,v2)) || (!GETBIT(visited,v1) && !GETBIT(visited,v2))) continue; if((GETBIT(visited,v1) && GETBIT(visited,v2)) || (!GETBIT(visited,v1) && !GETBIT(visited,v2))) continue;
if(GETBIT(visited,v1)) bmz8_traverse(bmz8, used_edges, v1, &unused_edge_index, visited); if(GETBIT(visited,v1)) bmz8_traverse(bmz8, used_edges, v1, &unused_edge_index, visited);
else bmz8_traverse(bmz8, used_edges, v2, &unused_edge_index, visited); else bmz8_traverse(bmz8, used_edges, v2, &unused_edge_index, visited);
@ -410,7 +410,7 @@ static void bmz8_traverse_non_critical_nodes(bmz8_config_data_t *bmz8, cmph_uint
for(i = 0; i < bmz8->n; i++) for(i = 0; i < bmz8->n; i++)
{ {
if(!GETBIT(visited,i)) if(!GETBIT(visited,i))
{ {
bmz8->g[i] = 0; bmz8->g[i] = 0;
SETBIT(visited, i); SETBIT(visited, i);
bmz8_traverse(bmz8, used_edges, i, &unused_edge_index, visited); bmz8_traverse(bmz8, used_edges, i, &unused_edge_index, visited);
@ -418,14 +418,14 @@ static void bmz8_traverse_non_critical_nodes(bmz8_config_data_t *bmz8, cmph_uint
} }
} }
static int bmz8_gen_edges(cmph_config_t *mph) static int bmz8_gen_edges(cmph_config_t *mph)
{ {
cmph_uint8 e; cmph_uint8 e;
bmz8_config_data_t *bmz8 = (bmz8_config_data_t *)mph->data; bmz8_config_data_t *bmz8 = (bmz8_config_data_t *)mph->data;
cmph_uint8 multiple_edges = 0; cmph_uint8 multiple_edges = 0;
DEBUGP("Generating edges for %u vertices\n", bmz8->n); DEBUGP("Generating edges for %u vertices\n", bmz8->n);
graph_clear_edges(bmz8->graph); graph_clear_edges(bmz8->graph);
mph->key_source->rewind(mph->key_source->data); mph->key_source->rewind(mph->key_source->data);
for (e = 0; e < mph->key_source->nkeys; ++e) for (e = 0; e < mph->key_source->nkeys; ++e)
{ {
@ -433,12 +433,12 @@ static int bmz8_gen_edges(cmph_config_t *mph)
cmph_uint32 keylen; cmph_uint32 keylen;
char *key = NULL; char *key = NULL;
mph->key_source->read(mph->key_source->data, &key, &keylen); mph->key_source->read(mph->key_source->data, &key, &keylen);
// if (key == NULL)fprintf(stderr, "key = %s -- read BMZ\n", key); // if (key == NULL)fprintf(stderr, "key = %s -- read BMZ\n", key);
h1 = (cmph_uint8)(hash(bmz8->hashes[0], key, keylen) % bmz8->n); h1 = (cmph_uint8)(hash(bmz8->hashes[0], key, keylen) % bmz8->n);
h2 = (cmph_uint8)(hash(bmz8->hashes[1], key, keylen) % bmz8->n); h2 = (cmph_uint8)(hash(bmz8->hashes[1], key, keylen) % bmz8->n);
if (h1 == h2) if (++h2 >= bmz8->n) h2 = 0; if (h1 == h2) if (++h2 >= bmz8->n) h2 = 0;
if (h1 == h2) if (h1 == h2)
{ {
if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e); if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e);
mph->key_source->dispose(mph->key_source->data, key, keylen); mph->key_source->dispose(mph->key_source->data, key, keylen);
@ -480,7 +480,7 @@ int bmz8_dump(cmph_t *mphf, FILE *fd)
nbytes = fwrite(&(data->n), sizeof(cmph_uint8), (size_t)1, fd); nbytes = fwrite(&(data->n), sizeof(cmph_uint8), (size_t)1, fd);
nbytes = fwrite(&(data->m), sizeof(cmph_uint8), (size_t)1, fd); nbytes = fwrite(&(data->m), sizeof(cmph_uint8), (size_t)1, fd);
nbytes = fwrite(data->g, sizeof(cmph_uint8)*(data->n), (size_t)1, fd); nbytes = fwrite(data->g, sizeof(cmph_uint8)*(data->n), (size_t)1, fd);
/* #ifdef DEBUG /* #ifdef DEBUG
fprintf(stderr, "G: "); fprintf(stderr, "G: ");
@ -518,8 +518,8 @@ void bmz8_load(FILE *f, cmph_t *mphf)
} }
DEBUGP("Reading m and n\n"); DEBUGP("Reading m and n\n");
nbytes = fread(&(bmz8->n), sizeof(cmph_uint8), (size_t)1, f); nbytes = fread(&(bmz8->n), sizeof(cmph_uint8), (size_t)1, f);
nbytes = fread(&(bmz8->m), sizeof(cmph_uint8), (size_t)1, f); nbytes = fread(&(bmz8->m), sizeof(cmph_uint8), (size_t)1, f);
bmz8->g = (cmph_uint8 *)malloc(sizeof(cmph_uint8)*bmz8->n); bmz8->g = (cmph_uint8 *)malloc(sizeof(cmph_uint8)*bmz8->n);
nbytes = fread(bmz8->g, bmz8->n*sizeof(cmph_uint8), (size_t)1, f); nbytes = fread(bmz8->g, bmz8->n*sizeof(cmph_uint8), (size_t)1, f);
@ -530,7 +530,7 @@ void bmz8_load(FILE *f, cmph_t *mphf)
#endif #endif
return; return;
} }
cmph_uint8 bmz8_search(cmph_t *mphf, const char *key, cmph_uint32 keylen) cmph_uint8 bmz8_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
{ {
@ -556,7 +556,7 @@ void bmz8_destroy(cmph_t *mphf)
/** \fn void bmz8_pack(cmph_t *mphf, void *packed_mphf); /** \fn void bmz8_pack(cmph_t *mphf, void *packed_mphf);
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
* \param mphf pointer to the resulting mphf * \param mphf pointer to the resulting mphf
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
*/ */
void bmz8_pack(cmph_t *mphf, void *packed_mphf) void bmz8_pack(cmph_t *mphf, void *packed_mphf)
{ {
@ -585,26 +585,26 @@ void bmz8_pack(cmph_t *mphf, void *packed_mphf)
*ptr++ = data->n; *ptr++ = data->n;
// packing g // packing g
memcpy(ptr, data->g, sizeof(cmph_uint8)*data->n); memcpy(ptr, data->g, sizeof(cmph_uint8)*data->n);
} }
/** \fn cmph_uint32 bmz8_packed_size(cmph_t *mphf); /** \fn cmph_uint32 bmz8_packed_size(cmph_t *mphf);
* \brief Return the amount of space needed to pack mphf. * \brief Return the amount of space needed to pack mphf.
* \param mphf pointer to a mphf * \param mphf pointer to a mphf
* \return the size of the packed function or zero for failures * \return the size of the packed function or zero for failures
*/ */
cmph_uint32 bmz8_packed_size(cmph_t *mphf) cmph_uint32 bmz8_packed_size(cmph_t *mphf)
{ {
bmz8_data_t *data = (bmz8_data_t *)mphf->data; bmz8_data_t *data = (bmz8_data_t *)mphf->data;
CMPH_HASH h1_type = hash_get_type(data->hashes[0]); CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
CMPH_HASH h2_type = hash_get_type(data->hashes[1]); CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) + return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
2*sizeof(cmph_uint32) + sizeof(cmph_uint8) + sizeof(cmph_uint8)*data->n); 2*sizeof(cmph_uint32) + sizeof(cmph_uint8) + sizeof(cmph_uint8)*data->n);
} }
/** cmph_uint8 bmz8_search(void *packed_mphf, const char *key, cmph_uint32 keylen); /** cmph_uint8 bmz8_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
* \brief Use the packed mphf to do a search. * \brief Use the packed mphf to do a search.
* \param packed_mphf pointer to the packed mphf * \param packed_mphf pointer to the packed mphf
* \param key key to be hashed * \param key key to be hashed
* \param keylen key legth in bytes * \param keylen key legth in bytes
@ -619,14 +619,14 @@ cmph_uint8 bmz8_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke
register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type); register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr); register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr);
h2_ptr += 4; h2_ptr += 4;
register cmph_uint8 *g_ptr = h2_ptr + hash_state_packed_size(h2_type); register cmph_uint8 *g_ptr = h2_ptr + hash_state_packed_size(h2_type);
register cmph_uint8 n = *g_ptr++; register cmph_uint8 n = *g_ptr++;
register cmph_uint8 h1 = (cmph_uint8)(hash_packed(h1_ptr, h1_type, key, keylen) % n); register cmph_uint8 h1 = (cmph_uint8)(hash_packed(h1_ptr, h1_type, key, keylen) % n);
register cmph_uint8 h2 = (cmph_uint8)(hash_packed(h2_ptr, h2_type, key, keylen) % n); register cmph_uint8 h2 = (cmph_uint8)(hash_packed(h2_ptr, h2_type, key, keylen) % n);
DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2); DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
if (h1 == h2 && ++h2 > n) h2 = 0; if (h1 == h2 && ++h2 > n) h2 = 0;
return (cmph_uint8)(g_ptr[h1] + g_ptr[h2]); return (cmph_uint8)(g_ptr[h1] + g_ptr[h2]);
} }

190
src/brz.c
View File

@ -26,8 +26,9 @@ static char * brz_copy_partial_fch_mphf(brz_config_data_t *brz, fch_data_t * fch
static char * brz_copy_partial_bmz8_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_uint32 *buflen); static char * brz_copy_partial_bmz8_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_uint32 *buflen);
brz_config_data_t *brz_config_new(void) brz_config_data_t *brz_config_new(void)
{ {
brz_config_data_t *brz = NULL; brz_config_data_t *brz = NULL;
brz = (brz_config_data_t *)malloc(sizeof(brz_config_data_t)); brz = (brz_config_data_t *)malloc(sizeof(brz_config_data_t));
if (!brz) return NULL;
brz->algo = CMPH_FCH; brz->algo = CMPH_FCH;
brz->b = 128; brz->b = 128;
brz->hashfuncs[0] = CMPH_HASH_JENKINS; brz->hashfuncs[0] = CMPH_HASH_JENKINS;
@ -42,7 +43,7 @@ brz_config_data_t *brz_config_new(void)
brz->memory_availability = 1024*1024; brz->memory_availability = 1024*1024;
brz->tmp_dir = (cmph_uint8 *)calloc((size_t)10, sizeof(cmph_uint8)); brz->tmp_dir = (cmph_uint8 *)calloc((size_t)10, sizeof(cmph_uint8));
brz->mphf_fd = NULL; brz->mphf_fd = NULL;
strcpy((char *)(brz->tmp_dir), "/var/tmp/"); strcpy((char *)(brz->tmp_dir), "/var/tmp/");
assert(brz); assert(brz);
return brz; return brz;
} }
@ -63,7 +64,7 @@ void brz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
while(*hashptr != CMPH_HASH_COUNT) while(*hashptr != CMPH_HASH_COUNT)
{ {
if (i >= 3) break; //brz only uses three hash functions if (i >= 3) break; //brz only uses three hash functions
brz->hashfuncs[i] = *hashptr; brz->hashfuncs[i] = *hashptr;
++i, ++hashptr; ++i, ++hashptr;
} }
} }
@ -84,14 +85,14 @@ void brz_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir)
if(tmp_dir[len-1] != '/') if(tmp_dir[len-1] != '/')
{ {
brz->tmp_dir = (cmph_uint8 *)calloc((size_t)len+2, sizeof(cmph_uint8)); brz->tmp_dir = (cmph_uint8 *)calloc((size_t)len+2, sizeof(cmph_uint8));
sprintf((char *)(brz->tmp_dir), "%s/", (char *)tmp_dir); sprintf((char *)(brz->tmp_dir), "%s/", (char *)tmp_dir);
} }
else else
{ {
brz->tmp_dir = (cmph_uint8 *)calloc((size_t)len+1, sizeof(cmph_uint8)); brz->tmp_dir = (cmph_uint8 *)calloc((size_t)len+1, sizeof(cmph_uint8));
sprintf((char *)(brz->tmp_dir), "%s", (char *)tmp_dir); sprintf((char *)(brz->tmp_dir), "%s", (char *)tmp_dir);
} }
} }
} }
@ -105,14 +106,14 @@ void brz_config_set_mphf_fd(cmph_config_t *mph, FILE *mphf_fd)
void brz_config_set_b(cmph_config_t *mph, cmph_uint32 b) void brz_config_set_b(cmph_config_t *mph, cmph_uint32 b)
{ {
brz_config_data_t *brz = (brz_config_data_t *)mph->data; brz_config_data_t *brz = (brz_config_data_t *)mph->data;
if(b <= 64 || b >= 175) if(b <= 64 || b >= 175)
{ {
b = 128; b = 128;
} }
brz->b = (cmph_uint8)b; brz->b = (cmph_uint8)b;
} }
void brz_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo) void brz_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo)
{ {
if (algo == CMPH_BMZ8 || algo == CMPH_FCH) // supported algorithms if (algo == CMPH_BMZ8 || algo == CMPH_FCH) // supported algorithms
{ {
@ -147,13 +148,13 @@ cmph_t *brz_new(cmph_config_t *mph, double c)
brz->k = (cmph_uint32)ceil(brz->m/((double)brz->b)); brz->k = (cmph_uint32)ceil(brz->m/((double)brz->b));
DEBUGP("k: %u\n", brz->k); DEBUGP("k: %u\n", brz->k);
brz->size = (cmph_uint8 *) calloc((size_t)brz->k, sizeof(cmph_uint8)); brz->size = (cmph_uint8 *) calloc((size_t)brz->k, sizeof(cmph_uint8));
// Clustering the keys by graph id. // Clustering the keys by graph id.
if (mph->verbosity) if (mph->verbosity)
{ {
fprintf(stderr, "Partioning the set of keys.\n"); fprintf(stderr, "Partioning the set of keys.\n");
} }
while(1) while(1)
{ {
int ok; int ok;
@ -172,17 +173,17 @@ cmph_t *brz_new(cmph_config_t *mph, double c)
fprintf(stderr, "Failure: A graph with more than 255 keys was created - %u iterations remaining\n", iterations); fprintf(stderr, "Failure: A graph with more than 255 keys was created - %u iterations remaining\n", iterations);
} }
if (iterations == 0) break; if (iterations == 0) break;
} }
else break; else break;
} }
if (iterations == 0) if (iterations == 0)
{ {
DEBUGP("Graphs with more than 255 keys were created in all 20 iterations\n"); DEBUGP("Graphs with more than 255 keys were created in all 20 iterations\n");
free(brz->size); free(brz->size);
return NULL; return NULL;
} }
DEBUGP("Graphs generated\n"); DEBUGP("Graphs generated\n");
brz->offset = (cmph_uint32 *)calloc((size_t)brz->k, sizeof(cmph_uint32)); brz->offset = (cmph_uint32 *)calloc((size_t)brz->k, sizeof(cmph_uint32));
for (i = 1; i < brz->k; ++i) for (i = 1; i < brz->k; ++i)
{ {
@ -209,7 +210,7 @@ cmph_t *brz_new(cmph_config_t *mph, double c)
brzf->m = brz->m; brzf->m = brz->m;
brzf->algo = brz->algo; brzf->algo = brz->algo;
mphf->data = brzf; mphf->data = brzf;
mphf->size = brz->m; mphf->size = brz->m;
DEBUGP("Successfully generated minimal perfect hash\n"); DEBUGP("Successfully generated minimal perfect hash\n");
if (mph->verbosity) if (mph->verbosity)
{ {
@ -240,7 +241,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
cmph_uint32 cur_bucket = 0; cmph_uint32 cur_bucket = 0;
cmph_uint8 nkeys_vd = 0; cmph_uint8 nkeys_vd = 0;
cmph_uint8 ** keys_vd = NULL; cmph_uint8 ** keys_vd = NULL;
mph->key_source->rewind(mph->key_source->data); mph->key_source->rewind(mph->key_source->data);
DEBUGP("Generating graphs from %u keys\n", brz->m); DEBUGP("Generating graphs from %u keys\n", brz->m);
// Partitioning // Partitioning
@ -249,7 +250,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
mph->key_source->read(mph->key_source->data, &key, &keylen); mph->key_source->read(mph->key_source->data, &key, &keylen);
/* Buffers management */ /* Buffers management */
if (memory_usage + keylen + sizeof(keylen) > brz->memory_availability) // flush buffers if (memory_usage + keylen + sizeof(keylen) > brz->memory_availability) // flush buffers
{ {
if(mph->verbosity) if(mph->verbosity)
{ {
@ -265,8 +266,8 @@ static int brz_gen_mphf(cmph_config_t *mph)
sum += value; sum += value;
value = buckets_size[i]; value = buckets_size[i];
buckets_size[i] = sum; buckets_size[i] = sum;
} }
memory_usage = 0; memory_usage = 0;
keys_index = (cmph_uint32 *)calloc((size_t)nkeys_in_buffer, sizeof(cmph_uint32)); keys_index = (cmph_uint32 *)calloc((size_t)nkeys_in_buffer, sizeof(cmph_uint32));
for(i = 0; i < nkeys_in_buffer; i++) for(i = 0; i < nkeys_in_buffer; i++)
@ -298,8 +299,8 @@ static int brz_gen_mphf(cmph_config_t *mph)
memcpy(buffer + memory_usage + sizeof(keylen), key, (size_t)keylen); memcpy(buffer + memory_usage + sizeof(keylen), key, (size_t)keylen);
memory_usage += keylen + (cmph_uint32)sizeof(keylen); memory_usage += keylen + (cmph_uint32)sizeof(keylen);
h0 = hash(brz->h0, key, keylen) % brz->k; h0 = hash(brz->h0, key, keylen) % brz->k;
if ((brz->size[h0] == MAX_BUCKET_SIZE) || (brz->algo == CMPH_BMZ8 && ((brz->c >= 1.0) && (cmph_uint8)(brz->c * brz->size[h0]) < brz->size[h0]))) if ((brz->size[h0] == MAX_BUCKET_SIZE) || (brz->algo == CMPH_BMZ8 && ((brz->c >= 1.0) && (cmph_uint8)(brz->c * brz->size[h0]) < brz->size[h0])))
{ {
free(buffer); free(buffer);
free(buckets_size); free(buckets_size);
@ -310,8 +311,8 @@ static int brz_gen_mphf(cmph_config_t *mph)
nkeys_in_buffer++; nkeys_in_buffer++;
mph->key_source->dispose(mph->key_source->data, key, keylen); mph->key_source->dispose(mph->key_source->data, key, keylen);
} }
if (memory_usage != 0) // flush buffers if (memory_usage != 0) // flush buffers
{ {
if(mph->verbosity) if(mph->verbosity)
{ {
fprintf(stderr, "Flushing %u\n", nkeys_in_buffer); fprintf(stderr, "Flushing %u\n", nkeys_in_buffer);
@ -370,12 +371,12 @@ static int brz_gen_mphf(cmph_config_t *mph)
nbytes = fwrite(&(brz->algo), sizeof(brz->algo), (size_t)1, brz->mphf_fd); nbytes = fwrite(&(brz->algo), sizeof(brz->algo), (size_t)1, brz->mphf_fd);
nbytes = fwrite(&(brz->k), sizeof(cmph_uint32), (size_t)1, brz->mphf_fd); // number of MPHFs nbytes = fwrite(&(brz->k), sizeof(cmph_uint32), (size_t)1, brz->mphf_fd); // number of MPHFs
nbytes = fwrite(brz->size, sizeof(cmph_uint8)*(brz->k), (size_t)1, brz->mphf_fd); nbytes = fwrite(brz->size, sizeof(cmph_uint8)*(brz->k), (size_t)1, brz->mphf_fd);
//tmp_fds = (FILE **)calloc(nflushes, sizeof(FILE *)); //tmp_fds = (FILE **)calloc(nflushes, sizeof(FILE *));
buff_manager = buffer_manager_new(brz->memory_availability, nflushes); buff_manager = buffer_manager_new(brz->memory_availability, nflushes);
buffer_merge = (cmph_uint8 **)calloc((size_t)nflushes, sizeof(cmph_uint8 *)); buffer_merge = (cmph_uint8 **)calloc((size_t)nflushes, sizeof(cmph_uint8 *));
buffer_h0 = (cmph_uint32 *)calloc((size_t)nflushes, sizeof(cmph_uint32)); buffer_h0 = (cmph_uint32 *)calloc((size_t)nflushes, sizeof(cmph_uint32));
memory_usage = 0; memory_usage = 0;
for(i = 0; i < nflushes; i++) for(i = 0; i < nflushes; i++)
{ {
@ -388,7 +389,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
h0 = hash(brz->h0, key+sizeof(keylen), keylen) % brz->k; h0 = hash(brz->h0, key+sizeof(keylen), keylen) % brz->k;
buffer_h0[i] = h0; buffer_h0[i] = h0;
buffer_merge[i] = (cmph_uint8 *)key; buffer_merge[i] = (cmph_uint8 *)key;
key = NULL; //transfer memory ownership key = NULL; //transfer memory ownership
} }
e = 0; e = 0;
keys_vd = (cmph_uint8 **)calloc((size_t)MAX_BUCKET_SIZE, sizeof(cmph_uint8 *)); keys_vd = (cmph_uint8 **)calloc((size_t)MAX_BUCKET_SIZE, sizeof(cmph_uint8 *));
@ -429,7 +430,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
e++; e++;
buffer_h0[i] = UINT_MAX; buffer_h0[i] = UINT_MAX;
} }
if(nkeys_vd == brz->size[cur_bucket]) // Generating mphf for each bucket. if(nkeys_vd == brz->size[cur_bucket]) // Generating mphf for each bucket.
{ {
cmph_io_adapter_t *source = NULL; cmph_io_adapter_t *source = NULL;
@ -444,7 +445,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
//cmph_config_set_algo(config, CMPH_BMZ8); //cmph_config_set_algo(config, CMPH_BMZ8);
cmph_config_set_graphsize(config, brz->c); cmph_config_set_graphsize(config, brz->c);
mphf_tmp = cmph_new(config); mphf_tmp = cmph_new(config);
if (mphf_tmp == NULL) if (mphf_tmp == NULL)
{ {
if(mph->verbosity) fprintf(stderr, "ERROR: Can't generate MPHF for bucket %u out of %u\n", cur_bucket + 1, brz->k); if(mph->verbosity) fprintf(stderr, "ERROR: Can't generate MPHF for bucket %u out of %u\n", cur_bucket + 1, brz->k);
error = 1; error = 1;
@ -453,9 +454,9 @@ static int brz_gen_mphf(cmph_config_t *mph)
cmph_io_byte_vector_adapter_destroy(source); cmph_io_byte_vector_adapter_destroy(source);
break; break;
} }
if(mph->verbosity) if(mph->verbosity)
{ {
if (cur_bucket % 1000 == 0) if (cur_bucket % 1000 == 0)
{ {
fprintf(stderr, "MPHF for bucket %u out of %u was generated.\n", cur_bucket + 1, brz->k); fprintf(stderr, "MPHF for bucket %u out of %u was generated.\n", cur_bucket + 1, brz->k);
} }
@ -465,7 +466,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
case CMPH_FCH: case CMPH_FCH:
{ {
fch_data_t * fchf = NULL; fch_data_t * fchf = NULL;
fchf = (fch_data_t *)mphf_tmp->data; fchf = (fch_data_t *)mphf_tmp->data;
bufmphf = brz_copy_partial_fch_mphf(brz, fchf, cur_bucket, &buflenmphf); bufmphf = brz_copy_partial_fch_mphf(brz, fchf, cur_bucket, &buflenmphf);
} }
break; break;
@ -516,7 +517,7 @@ static char * brz_copy_partial_fch_mphf(brz_config_data_t *brz, fch_data_t * fch
{ {
cmph_uint32 i = 0; cmph_uint32 i = 0;
cmph_uint32 buflenh1 = 0; cmph_uint32 buflenh1 = 0;
cmph_uint32 buflenh2 = 0; cmph_uint32 buflenh2 = 0;
char * bufh1 = NULL; char * bufh1 = NULL;
char * bufh2 = NULL; char * bufh2 = NULL;
char * buf = NULL; char * buf = NULL;
@ -528,7 +529,7 @@ static char * brz_copy_partial_fch_mphf(brz_config_data_t *brz, fch_data_t * fch
memcpy(buf, &buflenh1, sizeof(cmph_uint32)); memcpy(buf, &buflenh1, sizeof(cmph_uint32));
memcpy(buf+sizeof(cmph_uint32), bufh1, (size_t)buflenh1); memcpy(buf+sizeof(cmph_uint32), bufh1, (size_t)buflenh1);
memcpy(buf+sizeof(cmph_uint32)+buflenh1, &buflenh2, sizeof(cmph_uint32)); memcpy(buf+sizeof(cmph_uint32)+buflenh1, &buflenh2, sizeof(cmph_uint32));
memcpy(buf+2*sizeof(cmph_uint32)+buflenh1, bufh2, (size_t)buflenh2); memcpy(buf+2*sizeof(cmph_uint32)+buflenh1, bufh2, (size_t)buflenh2);
for (i = 0; i < n; i++) memcpy(buf+2*sizeof(cmph_uint32)+buflenh1+buflenh2+i,(fchf->g + i), (size_t)1); for (i = 0; i < n; i++) memcpy(buf+2*sizeof(cmph_uint32)+buflenh1+buflenh2+i,(fchf->g + i), (size_t)1);
free(bufh1); free(bufh1);
free(bufh2); free(bufh2);
@ -537,7 +538,7 @@ static char * brz_copy_partial_fch_mphf(brz_config_data_t *brz, fch_data_t * fch
static char * brz_copy_partial_bmz8_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_uint32 *buflen) static char * brz_copy_partial_bmz8_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_uint32 *buflen)
{ {
cmph_uint32 buflenh1 = 0; cmph_uint32 buflenh1 = 0;
cmph_uint32 buflenh2 = 0; cmph_uint32 buflenh2 = 0;
char * bufh1 = NULL; char * bufh1 = NULL;
char * bufh2 = NULL; char * bufh2 = NULL;
char * buf = NULL; char * buf = NULL;
@ -572,7 +573,7 @@ int brz_dump(cmph_t *mphf, FILE *fd)
nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd); nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd);
free(buf); free(buf);
// Dumping m and the vector offset. // Dumping m and the vector offset.
nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd); nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
nbytes = fwrite(data->offset, sizeof(cmph_uint32)*(data->k), (size_t)1, fd); nbytes = fwrite(data->offset, sizeof(cmph_uint32)*(data->k), (size_t)1, fd);
return 1; return 1;
} }
@ -591,7 +592,7 @@ void brz_load(FILE *f, cmph_t *mphf)
nbytes = fread(&(brz->algo), sizeof(brz->algo), (size_t)1, f); // Reading algo. nbytes = fread(&(brz->algo), sizeof(brz->algo), (size_t)1, f); // Reading algo.
nbytes = fread(&(brz->k), sizeof(cmph_uint32), (size_t)1, f); nbytes = fread(&(brz->k), sizeof(cmph_uint32), (size_t)1, f);
brz->size = (cmph_uint8 *) malloc(sizeof(cmph_uint8)*brz->k); brz->size = (cmph_uint8 *) malloc(sizeof(cmph_uint8)*brz->k);
nbytes = fread(brz->size, sizeof(cmph_uint8)*(brz->k), (size_t)1, f); nbytes = fread(brz->size, sizeof(cmph_uint8)*(brz->k), (size_t)1, f);
brz->h1 = (hash_state_t **)malloc(sizeof(hash_state_t *)*brz->k); brz->h1 = (hash_state_t **)malloc(sizeof(hash_state_t *)*brz->k);
brz->h2 = (hash_state_t **)malloc(sizeof(hash_state_t *)*brz->k); brz->h2 = (hash_state_t **)malloc(sizeof(hash_state_t *)*brz->k);
brz->g = (cmph_uint8 **) calloc((size_t)brz->k, sizeof(cmph_uint8 *)); brz->g = (cmph_uint8 **) calloc((size_t)brz->k, sizeof(cmph_uint8 *));
@ -635,7 +636,7 @@ void brz_load(FILE *f, cmph_t *mphf)
brz->h0 = hash_state_load(buf, buflen); brz->h0 = hash_state_load(buf, buflen);
free(buf); free(buf);
//loading c, m, and the vector offset. //loading c, m, and the vector offset.
nbytes = fread(&(brz->m), sizeof(cmph_uint32), (size_t)1, f); nbytes = fread(&(brz->m), sizeof(cmph_uint32), (size_t)1, f);
brz->offset = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*brz->k); brz->offset = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*brz->k);
nbytes = fread(brz->offset, sizeof(cmph_uint32)*(brz->k), (size_t)1, f); nbytes = fread(brz->offset, sizeof(cmph_uint32)*(brz->k), (size_t)1, f);
@ -654,9 +655,9 @@ static cmph_uint32 brz_bmz8_search(brz_data_t *brz, const char *key, cmph_uint32
register cmph_uint32 h1 = hash(brz->h1[h0], key, keylen) % n; register cmph_uint32 h1 = hash(brz->h1[h0], key, keylen) % n;
register cmph_uint32 h2 = hash(brz->h2[h0], key, keylen) % n; register cmph_uint32 h2 = hash(brz->h2[h0], key, keylen) % n;
register cmph_uint8 mphf_bucket; register cmph_uint8 mphf_bucket;
if (h1 == h2 && ++h2 >= n) h2 = 0; if (h1 == h2 && ++h2 >= n) h2 = 0;
mphf_bucket = (cmph_uint8)(brz->g[h0][h1] + brz->g[h0][h2]); mphf_bucket = (cmph_uint8)(brz->g[h0][h1] + brz->g[h0][h2]);
DEBUGP("key: %s h1: %u h2: %u h0: %u\n", key, h1, h2, h0); DEBUGP("key: %s h1: %u h2: %u h0: %u\n", key, h1, h2, h0);
DEBUGP("key: %s g[h1]: %u g[h2]: %u offset[h0]: %u edges: %u\n", key, brz->g[h0][h1], brz->g[h0][h2], brz->offset[h0], brz->m); DEBUGP("key: %s g[h1]: %u g[h2]: %u offset[h0]: %u edges: %u\n", key, brz->g[h0][h1], brz->g[h0][h2], brz->offset[h0], brz->m);
DEBUGP("Address: %u\n", mphf_bucket + brz->offset[h0]); DEBUGP("Address: %u\n", mphf_bucket + brz->offset[h0]);
@ -722,61 +723,61 @@ void brz_destroy(cmph_t *mphf)
/** \fn void brz_pack(cmph_t *mphf, void *packed_mphf); /** \fn void brz_pack(cmph_t *mphf, void *packed_mphf);
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
* \param mphf pointer to the resulting mphf * \param mphf pointer to the resulting mphf
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
*/ */
void brz_pack(cmph_t *mphf, void *packed_mphf) void brz_pack(cmph_t *mphf, void *packed_mphf)
{ {
brz_data_t *data = (brz_data_t *)mphf->data; brz_data_t *data = (brz_data_t *)mphf->data;
cmph_uint8 * ptr = packed_mphf; cmph_uint8 * ptr = packed_mphf;
cmph_uint32 i,n; cmph_uint32 i,n;
// packing internal algo type // packing internal algo type
memcpy(ptr, &(data->algo), sizeof(data->algo)); memcpy(ptr, &(data->algo), sizeof(data->algo));
ptr += sizeof(data->algo); ptr += sizeof(data->algo);
// packing h0 type // packing h0 type
CMPH_HASH h0_type = hash_get_type(data->h0); CMPH_HASH h0_type = hash_get_type(data->h0);
memcpy(ptr, &h0_type, sizeof(h0_type)); memcpy(ptr, &h0_type, sizeof(h0_type));
ptr += sizeof(h0_type); ptr += sizeof(h0_type);
// packing h0 // packing h0
hash_state_pack(data->h0, ptr); hash_state_pack(data->h0, ptr);
ptr += hash_state_packed_size(h0_type); ptr += hash_state_packed_size(h0_type);
// packing k // packing k
memcpy(ptr, &(data->k), sizeof(data->k)); memcpy(ptr, &(data->k), sizeof(data->k));
ptr += sizeof(data->k); ptr += sizeof(data->k);
// packing c // packing c
*((cmph_uint64 *)ptr) = (cmph_uint64)data->c; *((cmph_uint64 *)ptr) = (cmph_uint64)data->c;
ptr += sizeof(data->c); ptr += sizeof(data->c);
// packing h1 type // packing h1 type
CMPH_HASH h1_type = hash_get_type(data->h1[0]); CMPH_HASH h1_type = hash_get_type(data->h1[0]);
memcpy(ptr, &h1_type, sizeof(h1_type)); memcpy(ptr, &h1_type, sizeof(h1_type));
ptr += sizeof(h1_type); ptr += sizeof(h1_type);
// packing h2 type // packing h2 type
CMPH_HASH h2_type = hash_get_type(data->h2[0]); CMPH_HASH h2_type = hash_get_type(data->h2[0]);
memcpy(ptr, &h2_type, sizeof(h2_type)); memcpy(ptr, &h2_type, sizeof(h2_type));
ptr += sizeof(h2_type); ptr += sizeof(h2_type);
// packing size // packing size
memcpy(ptr, data->size, sizeof(cmph_uint8)*data->k); memcpy(ptr, data->size, sizeof(cmph_uint8)*data->k);
ptr += data->k; ptr += data->k;
// packing offset // packing offset
memcpy(ptr, data->offset, sizeof(cmph_uint32)*data->k); memcpy(ptr, data->offset, sizeof(cmph_uint32)*data->k);
ptr += sizeof(cmph_uint32)*data->k; ptr += sizeof(cmph_uint32)*data->k;
#if defined (__ia64) || defined (__x86_64__) #if defined (__ia64) || defined (__x86_64__)
cmph_uint64 * g_is_ptr = (cmph_uint64 *)ptr; cmph_uint64 * g_is_ptr = (cmph_uint64 *)ptr;
#else #else
cmph_uint32 * g_is_ptr = (cmph_uint32 *)ptr; cmph_uint32 * g_is_ptr = (cmph_uint32 *)ptr;
#endif #endif
cmph_uint8 * g_i = (cmph_uint8 *) (g_is_ptr + data->k); cmph_uint8 * g_i = (cmph_uint8 *) (g_is_ptr + data->k);
for(i = 0; i < data->k; i++) for(i = 0; i < data->k; i++)
{ {
#if defined (__ia64) || defined (__x86_64__) #if defined (__ia64) || defined (__x86_64__)
@ -787,7 +788,7 @@ void brz_pack(cmph_t *mphf, void *packed_mphf)
// packing h1[i] // packing h1[i]
hash_state_pack(data->h1[i], g_i); hash_state_pack(data->h1[i], g_i);
g_i += hash_state_packed_size(h1_type); g_i += hash_state_packed_size(h1_type);
// packing h2[i] // packing h2[i]
hash_state_pack(data->h2[i], g_i); hash_state_pack(data->h2[i], g_i);
g_i += hash_state_packed_size(h2_type); g_i += hash_state_packed_size(h2_type);
@ -803,9 +804,9 @@ void brz_pack(cmph_t *mphf, void *packed_mphf)
break; break;
default: assert(0); default: assert(0);
} }
memcpy(g_i, data->g[i], sizeof(cmph_uint8)*n); memcpy(g_i, data->g[i], sizeof(cmph_uint8)*n);
g_i += n; g_i += n;
} }
} }
@ -814,16 +815,16 @@ void brz_pack(cmph_t *mphf, void *packed_mphf)
* \brief Return the amount of space needed to pack mphf. * \brief Return the amount of space needed to pack mphf.
* \param mphf pointer to a mphf * \param mphf pointer to a mphf
* \return the size of the packed function or zero for failures * \return the size of the packed function or zero for failures
*/ */
cmph_uint32 brz_packed_size(cmph_t *mphf) cmph_uint32 brz_packed_size(cmph_t *mphf)
{ {
cmph_uint32 i; cmph_uint32 i;
cmph_uint32 size = 0; cmph_uint32 size = 0;
brz_data_t *data = (brz_data_t *)mphf->data; brz_data_t *data = (brz_data_t *)mphf->data;
CMPH_HASH h0_type = hash_get_type(data->h0); CMPH_HASH h0_type = hash_get_type(data->h0);
CMPH_HASH h1_type = hash_get_type(data->h1[0]); CMPH_HASH h1_type = hash_get_type(data->h1[0]);
CMPH_HASH h2_type = hash_get_type(data->h2[0]); CMPH_HASH h2_type = hash_get_type(data->h2[0]);
size = (cmph_uint32)(2*sizeof(CMPH_ALGO) + 3*sizeof(CMPH_HASH) + hash_state_packed_size(h0_type) + sizeof(cmph_uint32) + size = (cmph_uint32)(2*sizeof(CMPH_ALGO) + 3*sizeof(CMPH_HASH) + hash_state_packed_size(h0_type) + sizeof(cmph_uint32) +
sizeof(double) + sizeof(cmph_uint8)*data->k + sizeof(cmph_uint32)*data->k); sizeof(double) + sizeof(cmph_uint8)*data->k + sizeof(cmph_uint32)*data->k);
// pointers to g_is // pointers to g_is
#if defined (__ia64) || defined (__x86_64__) #if defined (__ia64) || defined (__x86_64__)
@ -831,10 +832,10 @@ cmph_uint32 brz_packed_size(cmph_t *mphf)
#else #else
size += (cmph_uint32) sizeof(cmph_uint32)*data->k; size += (cmph_uint32) sizeof(cmph_uint32)*data->k;
#endif #endif
size += hash_state_packed_size(h1_type) * data->k; size += hash_state_packed_size(h1_type) * data->k;
size += hash_state_packed_size(h2_type) * data->k; size += hash_state_packed_size(h2_type) * data->k;
cmph_uint32 n = 0; cmph_uint32 n = 0;
for(i = 0; i < data->k; i++) for(i = 0; i < data->k; i++)
{ {
@ -848,7 +849,7 @@ cmph_uint32 brz_packed_size(cmph_t *mphf)
break; break;
default: assert(0); default: assert(0);
} }
size += n; size += n;
} }
return size; return size;
} }
@ -859,28 +860,28 @@ static cmph_uint32 brz_bmz8_search_packed(cmph_uint32 *packed_mphf, const char *
{ {
register CMPH_HASH h0_type = *packed_mphf++; register CMPH_HASH h0_type = *packed_mphf++;
register cmph_uint32 *h0_ptr = packed_mphf; register cmph_uint32 *h0_ptr = packed_mphf;
packed_mphf = (cmph_uint32 *)(((cmph_uint8 *)packed_mphf) + hash_state_packed_size(h0_type)); packed_mphf = (cmph_uint32 *)(((cmph_uint8 *)packed_mphf) + hash_state_packed_size(h0_type));
register cmph_uint32 k = *packed_mphf++; register cmph_uint32 k = *packed_mphf++;
register double c = (double)(*((cmph_uint64*)packed_mphf)); register double c = (double)(*((cmph_uint64*)packed_mphf));
packed_mphf += 2; packed_mphf += 2;
register CMPH_HASH h1_type = *packed_mphf++; register CMPH_HASH h1_type = *packed_mphf++;
register CMPH_HASH h2_type = *packed_mphf++; register CMPH_HASH h2_type = *packed_mphf++;
register cmph_uint8 * size = (cmph_uint8 *) packed_mphf; register cmph_uint8 * size = (cmph_uint8 *) packed_mphf;
packed_mphf = (cmph_uint32 *)(size + k); packed_mphf = (cmph_uint32 *)(size + k);
register cmph_uint32 * offset = packed_mphf; register cmph_uint32 * offset = packed_mphf;
packed_mphf += k; packed_mphf += k;
register cmph_uint32 h0; register cmph_uint32 h0;
hash_vector_packed(h0_ptr, h0_type, key, keylen, fingerprint); hash_vector_packed(h0_ptr, h0_type, key, keylen, fingerprint);
h0 = fingerprint[2] % k; h0 = fingerprint[2] % k;
register cmph_uint32 m = size[h0]; register cmph_uint32 m = size[h0];
register cmph_uint32 n = (cmph_uint32)ceil(c * m); register cmph_uint32 n = (cmph_uint32)ceil(c * m);
@ -889,69 +890,69 @@ static cmph_uint32 brz_bmz8_search_packed(cmph_uint32 *packed_mphf, const char *
#else #else
register cmph_uint32 * g_is_ptr = packed_mphf; register cmph_uint32 * g_is_ptr = packed_mphf;
#endif #endif
register cmph_uint8 * h1_ptr = (cmph_uint8 *) g_is_ptr[h0]; register cmph_uint8 * h1_ptr = (cmph_uint8 *) g_is_ptr[h0];
register cmph_uint8 * h2_ptr = h1_ptr + hash_state_packed_size(h1_type); register cmph_uint8 * h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
register cmph_uint8 * g = h2_ptr + hash_state_packed_size(h2_type); register cmph_uint8 * g = h2_ptr + hash_state_packed_size(h2_type);
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n; register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n;
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n; register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n;
register cmph_uint8 mphf_bucket; register cmph_uint8 mphf_bucket;
if (h1 == h2 && ++h2 >= n) h2 = 0; if (h1 == h2 && ++h2 >= n) h2 = 0;
mphf_bucket = (cmph_uint8)(g[h1] + g[h2]); mphf_bucket = (cmph_uint8)(g[h1] + g[h2]);
DEBUGP("key: %s h1: %u h2: %u h0: %u\n", key, h1, h2, h0); DEBUGP("key: %s h1: %u h2: %u h0: %u\n", key, h1, h2, h0);
DEBUGP("Address: %u\n", mphf_bucket + offset[h0]); DEBUGP("Address: %u\n", mphf_bucket + offset[h0]);
return (mphf_bucket + offset[h0]); return (mphf_bucket + offset[h0]);
} }
static cmph_uint32 brz_fch_search_packed(cmph_uint32 *packed_mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint) static cmph_uint32 brz_fch_search_packed(cmph_uint32 *packed_mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint)
{ {
register CMPH_HASH h0_type = *packed_mphf++; register CMPH_HASH h0_type = *packed_mphf++;
register cmph_uint32 *h0_ptr = packed_mphf; register cmph_uint32 *h0_ptr = packed_mphf;
packed_mphf = (cmph_uint32 *)(((cmph_uint8 *)packed_mphf) + hash_state_packed_size(h0_type)); packed_mphf = (cmph_uint32 *)(((cmph_uint8 *)packed_mphf) + hash_state_packed_size(h0_type));
register cmph_uint32 k = *packed_mphf++; register cmph_uint32 k = *packed_mphf++;
register double c = (double)(*((cmph_uint64*)packed_mphf)); register double c = (double)(*((cmph_uint64*)packed_mphf));
packed_mphf += 2; packed_mphf += 2;
register CMPH_HASH h1_type = *packed_mphf++; register CMPH_HASH h1_type = *packed_mphf++;
register CMPH_HASH h2_type = *packed_mphf++; register CMPH_HASH h2_type = *packed_mphf++;
register cmph_uint8 * size = (cmph_uint8 *) packed_mphf; register cmph_uint8 * size = (cmph_uint8 *) packed_mphf;
packed_mphf = (cmph_uint32 *)(size + k); packed_mphf = (cmph_uint32 *)(size + k);
register cmph_uint32 * offset = packed_mphf; register cmph_uint32 * offset = packed_mphf;
packed_mphf += k; packed_mphf += k;
register cmph_uint32 h0; register cmph_uint32 h0;
hash_vector_packed(h0_ptr, h0_type, key, keylen, fingerprint); hash_vector_packed(h0_ptr, h0_type, key, keylen, fingerprint);
h0 = fingerprint[2] % k; h0 = fingerprint[2] % k;
register cmph_uint32 m = size[h0]; register cmph_uint32 m = size[h0];
register cmph_uint32 b = fch_calc_b(c, m); register cmph_uint32 b = fch_calc_b(c, m);
register double p1 = fch_calc_p1(m); register double p1 = fch_calc_p1(m);
register double p2 = fch_calc_p2(b); register double p2 = fch_calc_p2(b);
#if defined (__ia64) || defined (__x86_64__) #if defined (__ia64) || defined (__x86_64__)
register cmph_uint64 * g_is_ptr = (cmph_uint64 *)packed_mphf; register cmph_uint64 * g_is_ptr = (cmph_uint64 *)packed_mphf;
#else #else
register cmph_uint32 * g_is_ptr = packed_mphf; register cmph_uint32 * g_is_ptr = packed_mphf;
#endif #endif
register cmph_uint8 * h1_ptr = (cmph_uint8 *) g_is_ptr[h0]; register cmph_uint8 * h1_ptr = (cmph_uint8 *) g_is_ptr[h0];
register cmph_uint8 * h2_ptr = h1_ptr + hash_state_packed_size(h1_type); register cmph_uint8 * h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
register cmph_uint8 * g = h2_ptr + hash_state_packed_size(h2_type); register cmph_uint8 * g = h2_ptr + hash_state_packed_size(h2_type);
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % m; register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % m;
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % m; register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % m;
@ -962,7 +963,7 @@ static cmph_uint32 brz_fch_search_packed(cmph_uint32 *packed_mphf, const char *k
} }
/** cmph_uint32 brz_search(void *packed_mphf, const char *key, cmph_uint32 keylen); /** cmph_uint32 brz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
* \brief Use the packed mphf to do a search. * \brief Use the packed mphf to do a search.
* \param packed_mphf pointer to the packed mphf * \param packed_mphf pointer to the packed mphf
* \param key key to be hashed * \param key key to be hashed
* \param keylen key legth in bytes * \param keylen key legth in bytes
@ -970,7 +971,7 @@ static cmph_uint32 brz_fch_search_packed(cmph_uint32 *packed_mphf, const char *k
*/ */
cmph_uint32 brz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen) cmph_uint32 brz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
{ {
register cmph_uint32 *ptr = (cmph_uint32 *)packed_mphf; register cmph_uint32 *ptr = (cmph_uint32 *)packed_mphf;
register CMPH_ALGO algo = *ptr++; register CMPH_ALGO algo = *ptr++;
cmph_uint32 fingerprint[3]; cmph_uint32 fingerprint[3];
switch(algo) switch(algo)
@ -982,4 +983,3 @@ cmph_uint32 brz_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke
default: assert(0); default: assert(0);
} }
} }

View File

@ -17,7 +17,7 @@ struct __buffer_entry_t
buffer_entry_t * buffer_entry_new(cmph_uint32 capacity) buffer_entry_t * buffer_entry_new(cmph_uint32 capacity)
{ {
buffer_entry_t *buff_entry = (buffer_entry_t *)malloc(sizeof(buffer_entry_t)); buffer_entry_t *buff_entry = (buffer_entry_t *)malloc(sizeof(buffer_entry_t));
assert(buff_entry); if (!buff_entry) return NULL;
buff_entry->fd = NULL; buff_entry->fd = NULL;
buff_entry->buff = NULL; buff_entry->buff = NULL;
buff_entry->capacity = capacity; buff_entry->capacity = capacity;
@ -62,7 +62,7 @@ cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry, cmph_uint32 *
free(buf); free(buf);
return NULL; return NULL;
} }
if((buffer_entry->pos + lacked_bytes) > buffer_entry->nbytes) if((buffer_entry->pos + lacked_bytes) > buffer_entry->nbytes)
{ {
copied_bytes = buffer_entry->nbytes - buffer_entry->pos; copied_bytes = buffer_entry->nbytes - buffer_entry->pos;
lacked_bytes = (buffer_entry->pos + lacked_bytes) - buffer_entry->nbytes; lacked_bytes = (buffer_entry->pos + lacked_bytes) - buffer_entry->nbytes;
@ -71,7 +71,7 @@ cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry, cmph_uint32 *
} }
memcpy(keylen + copied_bytes, buffer_entry->buff + buffer_entry->pos, (size_t)lacked_bytes); memcpy(keylen + copied_bytes, buffer_entry->buff + buffer_entry->pos, (size_t)lacked_bytes);
buffer_entry->pos += lacked_bytes; buffer_entry->pos += lacked_bytes;
lacked_bytes = *keylen; lacked_bytes = *keylen;
copied_bytes = 0; copied_bytes = 0;
buf = (cmph_uint8 *)malloc(*keylen + sizeof(*keylen)); buf = (cmph_uint8 *)malloc(*keylen + sizeof(*keylen));
@ -83,7 +83,7 @@ cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry, cmph_uint32 *
memcpy(buf + sizeof(*keylen), buffer_entry->buff + buffer_entry->pos, (size_t)copied_bytes); memcpy(buf + sizeof(*keylen), buffer_entry->buff + buffer_entry->pos, (size_t)copied_bytes);
} }
buffer_entry_load(buffer_entry); buffer_entry_load(buffer_entry);
} }
memcpy(buf+sizeof(*keylen)+copied_bytes, buffer_entry->buff + buffer_entry->pos, (size_t)lacked_bytes); memcpy(buf+sizeof(*keylen)+copied_bytes, buffer_entry->buff + buffer_entry->pos, (size_t)lacked_bytes);
buffer_entry->pos += lacked_bytes; buffer_entry->pos += lacked_bytes;
return buf; return buf;
@ -97,7 +97,7 @@ void buffer_entry_destroy(buffer_entry_t * buffer_entry)
buffer_entry->buff = NULL; buffer_entry->buff = NULL;
buffer_entry->capacity = 0; buffer_entry->capacity = 0;
buffer_entry->nbytes = 0; buffer_entry->nbytes = 0;
buffer_entry->pos = 0; buffer_entry->pos = 0;
buffer_entry->eof = 0; buffer_entry->eof = 0;
free(buffer_entry); free(buffer_entry);
} }

View File

@ -16,7 +16,7 @@ buffer_manage_t * buffer_manage_new(cmph_uint32 memory_avail, cmph_uint32 nentri
{ {
cmph_uint32 memory_avail_entry, i; cmph_uint32 memory_avail_entry, i;
buffer_manage_t *buff_manage = (buffer_manage_t *)malloc(sizeof(buffer_manage_t)); buffer_manage_t *buff_manage = (buffer_manage_t *)malloc(sizeof(buffer_manage_t));
assert(buff_manage); if (!buff_manage) return NULL;
buff_manage->memory_avail = memory_avail; buff_manage->memory_avail = memory_avail;
buff_manage->buffer_entries = (buffer_entry_t **)calloc((size_t)nentries, sizeof(buffer_entry_t *)); buff_manage->buffer_entries = (buffer_entry_t **)calloc((size_t)nentries, sizeof(buffer_entry_t *));
buff_manage->memory_avail_list = (cmph_uint32 *)calloc((size_t)nentries, sizeof(cmph_uint32)); buff_manage->memory_avail_list = (cmph_uint32 *)calloc((size_t)nentries, sizeof(cmph_uint32));
@ -26,7 +26,7 @@ buffer_manage_t * buffer_manage_new(cmph_uint32 memory_avail, cmph_uint32 nentri
for(i = 0; i < buff_manage->nentries; i++) for(i = 0; i < buff_manage->nentries; i++)
{ {
buff_manage->buffer_entries[i] = buffer_entry_new(memory_avail_entry); buff_manage->buffer_entries[i] = buffer_entry_new(memory_avail_entry);
} }
return buff_manage; return buff_manage;
} }
@ -54,7 +54,7 @@ cmph_uint8 * buffer_manage_read_key(buffer_manage_t * buffer_manage, cmph_uint32
} }
void buffer_manage_destroy(buffer_manage_t * buffer_manage) void buffer_manage_destroy(buffer_manage_t * buffer_manage)
{ {
cmph_uint32 i; cmph_uint32 i;
for(i = 0; i < buffer_manage->nentries; i++) for(i = 0; i < buffer_manage->nentries; i++)
{ {

View File

@ -16,7 +16,7 @@ buffer_manager_t * buffer_manager_new(cmph_uint32 memory_avail, cmph_uint32 nent
{ {
cmph_uint32 memory_avail_entry, i; cmph_uint32 memory_avail_entry, i;
buffer_manager_t *buff_manager = (buffer_manager_t *)malloc(sizeof(buffer_manager_t)); buffer_manager_t *buff_manager = (buffer_manager_t *)malloc(sizeof(buffer_manager_t));
assert(buff_manager); if (!buff_manager) return NULL;
buff_manager->memory_avail = memory_avail; buff_manager->memory_avail = memory_avail;
buff_manager->buffer_entries = (buffer_entry_t **)calloc((size_t)nentries, sizeof(buffer_entry_t *)); buff_manager->buffer_entries = (buffer_entry_t **)calloc((size_t)nentries, sizeof(buffer_entry_t *));
buff_manager->memory_avail_list = (cmph_uint32 *)calloc((size_t)nentries, sizeof(cmph_uint32)); buff_manager->memory_avail_list = (cmph_uint32 *)calloc((size_t)nentries, sizeof(cmph_uint32));
@ -26,7 +26,7 @@ buffer_manager_t * buffer_manager_new(cmph_uint32 memory_avail, cmph_uint32 nent
for(i = 0; i < buff_manager->nentries; i++) for(i = 0; i < buff_manager->nentries; i++)
{ {
buff_manager->buffer_entries[i] = buffer_entry_new(memory_avail_entry); buff_manager->buffer_entries[i] = buffer_entry_new(memory_avail_entry);
} }
return buff_manager; return buff_manager;
} }
@ -52,7 +52,7 @@ cmph_uint8 * buffer_manager_read_key(buffer_manager_t * buffer_manager, cmph_uin
} }
void buffer_manager_destroy(buffer_manager_t * buffer_manager) void buffer_manager_destroy(buffer_manager_t * buffer_manager)
{ {
cmph_uint32 i; cmph_uint32 i;
for(i = 0; i < buffer_manager->nentries; i++) for(i = 0; i < buffer_manager->nentries; i++)
{ {

View File

@ -18,7 +18,7 @@ chd_config_data_t *chd_config_new(cmph_config_t *mph)
cmph_io_adapter_t *key_source = mph->key_source; cmph_io_adapter_t *key_source = mph->key_source;
chd_config_data_t *chd; chd_config_data_t *chd;
chd = (chd_config_data_t *)malloc(sizeof(chd_config_data_t)); chd = (chd_config_data_t *)malloc(sizeof(chd_config_data_t));
assert(chd); if (!chd) return NULL;
memset(chd, 0, sizeof(chd_config_data_t)); memset(chd, 0, sizeof(chd_config_data_t));
chd->chd_ph = cmph_config_new(key_source); chd->chd_ph = cmph_config_new(key_source);
@ -69,12 +69,12 @@ cmph_t *chd_new(cmph_config_t *mph, double c)
chd_config_data_t *chd = (chd_config_data_t *)mph->data; chd_config_data_t *chd = (chd_config_data_t *)mph->data;
chd_ph_config_data_t * chd_ph = (chd_ph_config_data_t *)chd->chd_ph->data; chd_ph_config_data_t * chd_ph = (chd_ph_config_data_t *)chd->chd_ph->data;
compressed_rank_t cr; compressed_rank_t cr;
register cmph_t * chd_phf = NULL; register cmph_t * chd_phf = NULL;
register cmph_uint32 packed_chd_phf_size = 0; register cmph_uint32 packed_chd_phf_size = 0;
cmph_uint8 * packed_chd_phf = NULL; cmph_uint8 * packed_chd_phf = NULL;
register cmph_uint32 packed_cr_size = 0; register cmph_uint32 packed_cr_size = 0;
cmph_uint8 * packed_cr = NULL; cmph_uint8 * packed_cr = NULL;
register cmph_uint32 i, idx, nkeys, nvals, nbins; register cmph_uint32 i, idx, nkeys, nvals, nbins;
@ -86,24 +86,24 @@ cmph_t *chd_new(cmph_config_t *mph, double c)
ELAPSED_TIME_IN_SECONDS(&construction_time_begin); ELAPSED_TIME_IN_SECONDS(&construction_time_begin);
#endif #endif
cmph_config_set_verbosity(chd->chd_ph, mph->verbosity); cmph_config_set_verbosity(chd->chd_ph, mph->verbosity);
cmph_config_set_graphsize(chd->chd_ph, c); cmph_config_set_graphsize(chd->chd_ph, c);
if (mph->verbosity) if (mph->verbosity)
{ {
fprintf(stderr, "Generating a CHD_PH perfect hash function with a load factor equal to %.3f\n", c); fprintf(stderr, "Generating a CHD_PH perfect hash function with a load factor equal to %.3f\n", c);
} }
chd_phf = cmph_new(chd->chd_ph); chd_phf = cmph_new(chd->chd_ph);
if(chd_phf == NULL) if(chd_phf == NULL)
{ {
return NULL; return NULL;
} }
packed_chd_phf_size = cmph_packed_size(chd_phf); packed_chd_phf_size = cmph_packed_size(chd_phf);
DEBUGP("packed_chd_phf_size = %u\n", packed_chd_phf_size); DEBUGP("packed_chd_phf_size = %u\n", packed_chd_phf_size);
/* Make sure that we have enough space to pack the mphf. */ /* Make sure that we have enough space to pack the mphf. */
packed_chd_phf = calloc((size_t)packed_chd_phf_size,(size_t)1); packed_chd_phf = calloc((size_t)packed_chd_phf_size,(size_t)1);
@ -111,8 +111,8 @@ cmph_t *chd_new(cmph_config_t *mph, double c)
cmph_pack(chd_phf, packed_chd_phf); cmph_pack(chd_phf, packed_chd_phf);
cmph_destroy(chd_phf); cmph_destroy(chd_phf);
if (mph->verbosity) if (mph->verbosity)
{ {
fprintf(stderr, "Compressing the range of the resulting CHD_PH perfect hash function\n"); fprintf(stderr, "Compressing the range of the resulting CHD_PH perfect hash function\n");
@ -121,11 +121,11 @@ cmph_t *chd_new(cmph_config_t *mph, double c)
compressed_rank_init(&cr); compressed_rank_init(&cr);
nbins = chd_ph->n; nbins = chd_ph->n;
nkeys = chd_ph->m; nkeys = chd_ph->m;
nvals = nbins - nkeys; nvals = nbins - nkeys;
vals_table = (cmph_uint32 *)calloc(nvals, sizeof(cmph_uint32)); vals_table = (cmph_uint32 *)calloc(nvals, sizeof(cmph_uint32));
occup_table = (cmph_uint32 *)chd_ph->occup_table; occup_table = (cmph_uint32 *)chd_ph->occup_table;
for(i = 0, idx = 0; i < nbins; i++) for(i = 0, idx = 0; i < nbins; i++)
{ {
if(!GETBIT32(occup_table, i)) if(!GETBIT32(occup_table, i))
@ -133,10 +133,10 @@ cmph_t *chd_new(cmph_config_t *mph, double c)
vals_table[idx++] = i; vals_table[idx++] = i;
} }
} }
compressed_rank_generate(&cr, vals_table, nvals); compressed_rank_generate(&cr, vals_table, nvals);
free(vals_table); free(vals_table);
packed_cr_size = compressed_rank_packed_size(&cr); packed_cr_size = compressed_rank_packed_size(&cr);
packed_cr = (cmph_uint8 *) calloc(packed_cr_size, sizeof(cmph_uint8)); packed_cr = (cmph_uint8 *) calloc(packed_cr_size, sizeof(cmph_uint8));
compressed_rank_pack(&cr, packed_cr); compressed_rank_pack(&cr, packed_cr);
@ -145,16 +145,16 @@ cmph_t *chd_new(cmph_config_t *mph, double c)
mphf = (cmph_t *)malloc(sizeof(cmph_t)); mphf = (cmph_t *)malloc(sizeof(cmph_t));
mphf->algo = mph->algo; mphf->algo = mph->algo;
chdf = (chd_data_t *)malloc(sizeof(chd_data_t)); chdf = (chd_data_t *)malloc(sizeof(chd_data_t));
chdf->packed_cr = packed_cr; chdf->packed_cr = packed_cr;
packed_cr = NULL; //transfer memory ownership packed_cr = NULL; //transfer memory ownership
chdf->packed_chd_phf = packed_chd_phf; chdf->packed_chd_phf = packed_chd_phf;
packed_chd_phf = NULL; //transfer memory ownership packed_chd_phf = NULL; //transfer memory ownership
chdf->packed_chd_phf_size = packed_chd_phf_size; chdf->packed_chd_phf_size = packed_chd_phf_size;
chdf->packed_cr_size = packed_cr_size; chdf->packed_cr_size = packed_cr_size;
mphf->data = chdf; mphf->data = chdf;
mphf->size = nkeys; mphf->size = nkeys;
@ -163,12 +163,12 @@ cmph_t *chd_new(cmph_config_t *mph, double c)
{ {
fprintf(stderr, "Successfully generated minimal perfect hash function\n"); fprintf(stderr, "Successfully generated minimal perfect hash function\n");
} }
#ifdef CMPH_TIMING #ifdef CMPH_TIMING
ELAPSED_TIME_IN_SECONDS(&construction_time); ELAPSED_TIME_IN_SECONDS(&construction_time);
register cmph_uint32 space_usage = chd_packed_size(mphf)*8; register cmph_uint32 space_usage = chd_packed_size(mphf)*8;
construction_time = construction_time - construction_time_begin; construction_time = construction_time - construction_time_begin;
fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", nkeys, c, chd_ph->keys_per_bucket, construction_time, space_usage/(double)nkeys); fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", nkeys, c, chd_ph->keys_per_bucket, construction_time, space_usage/(double)nkeys);
#endif #endif
return mphf; return mphf;
} }
@ -196,7 +196,7 @@ int chd_dump(cmph_t *mphf, FILE *fd)
{ {
register size_t nbytes; register size_t nbytes;
chd_data_t *data = (chd_data_t *)mphf->data; chd_data_t *data = (chd_data_t *)mphf->data;
__cmph_dump(mphf, fd); __cmph_dump(mphf, fd);
// Dumping CHD_PH perfect hash function // Dumping CHD_PH perfect hash function
@ -207,7 +207,7 @@ int chd_dump(cmph_t *mphf, FILE *fd)
DEBUGP("Dumping compressed rank structure with %u bytes to disk\n", 1); DEBUGP("Dumping compressed rank structure with %u bytes to disk\n", 1);
nbytes = fwrite(&data->packed_cr_size, sizeof(cmph_uint32), (size_t)1, fd); nbytes = fwrite(&data->packed_cr_size, sizeof(cmph_uint32), (size_t)1, fd);
nbytes = fwrite(data->packed_cr, data->packed_cr_size, (size_t)1, fd); nbytes = fwrite(data->packed_cr, data->packed_cr_size, (size_t)1, fd);
return 1; return 1;
} }
@ -242,10 +242,10 @@ void chd_pack(cmph_t *mphf, void *packed_mphf)
// packing packed_cr_size and packed_cr // packing packed_cr_size and packed_cr
*ptr = data->packed_cr_size; *ptr = data->packed_cr_size;
ptr8 = (cmph_uint8 *) (ptr + 1); ptr8 = (cmph_uint8 *) (ptr + 1);
memcpy(ptr8, data->packed_cr, data->packed_cr_size); memcpy(ptr8, data->packed_cr, data->packed_cr_size);
ptr8 += data->packed_cr_size; ptr8 += data->packed_cr_size;
ptr = (cmph_uint32 *) ptr8; ptr = (cmph_uint32 *) ptr8;
*ptr = data->packed_chd_phf_size; *ptr = data->packed_chd_phf_size;
@ -268,5 +268,3 @@ cmph_uint32 chd_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke
register cmph_uint8 * packed_chd_phf = ((cmph_uint8 *) ptr) + packed_cr_size + sizeof(cmph_uint32); register cmph_uint8 * packed_chd_phf = ((cmph_uint8 *) ptr) + packed_cr_size + sizeof(cmph_uint32);
return _chd_search(packed_chd_phf, ptr, key, keylen); return _chd_search(packed_chd_phf, ptr, key, keylen);
} }

View File

@ -29,7 +29,7 @@ struct _chd_ph_item_t
}; };
typedef struct _chd_ph_item_t chd_ph_item_t; typedef struct _chd_ph_item_t chd_ph_item_t;
// struct to represent the items at mapping phase only. // struct to represent the items at mapping phase only.
struct _chd_ph_map_item_t struct _chd_ph_map_item_t
{ {
cmph_uint32 f; cmph_uint32 f;
@ -85,7 +85,7 @@ static cmph_uint8 chd_ph_bucket_insert(chd_ph_bucket_t * buckets,chd_ph_map_item
register chd_ph_map_item_t * tmp_map_item = map_items + item_idx; register chd_ph_map_item_t * tmp_map_item = map_items + item_idx;
register chd_ph_bucket_t * bucket = buckets + tmp_map_item->bucket_num; register chd_ph_bucket_t * bucket = buckets + tmp_map_item->bucket_num;
tmp_item = items + bucket->items_list; tmp_item = items + bucket->items_list;
for(i = 0; i < bucket->size; i++) for(i = 0; i < bucket->size; i++)
{ {
if(tmp_item->f == tmp_map_item->f && tmp_item->h == tmp_map_item->h) if(tmp_item->f == tmp_map_item->f && tmp_item->h == tmp_map_item->h)
@ -105,7 +105,7 @@ void chd_ph_bucket_destroy(chd_ph_bucket_t * buckets)
free(buckets); free(buckets);
} }
static inline cmph_uint8 chd_ph_mapping(cmph_config_t *mph, chd_ph_bucket_t * buckets, chd_ph_item_t * items, static inline cmph_uint8 chd_ph_mapping(cmph_config_t *mph, chd_ph_bucket_t * buckets, chd_ph_item_t * items,
cmph_uint32 *max_bucket_size); cmph_uint32 *max_bucket_size);
static chd_ph_sorted_list_t * chd_ph_ordering(chd_ph_bucket_t ** _buckets,chd_ph_item_t ** items, static chd_ph_sorted_list_t * chd_ph_ordering(chd_ph_bucket_t ** _buckets,chd_ph_item_t ** items,
@ -131,7 +131,7 @@ static inline double chd_ph_get_entropy(cmph_uint32 * disp_table, cmph_uint32 n,
{ {
probe_counts[disp_table[i]]++; probe_counts[disp_table[i]]++;
}; };
for(i = 0; i < max_probes; i++) for(i = 0; i < max_probes; i++)
{ {
if(probe_counts[i] > 0) if(probe_counts[i] > 0)
@ -145,9 +145,9 @@ chd_ph_config_data_t *chd_ph_config_new(void)
{ {
chd_ph_config_data_t *chd_ph; chd_ph_config_data_t *chd_ph;
chd_ph = (chd_ph_config_data_t *)malloc(sizeof(chd_ph_config_data_t)); chd_ph = (chd_ph_config_data_t *)malloc(sizeof(chd_ph_config_data_t));
assert(chd_ph); if (!chd_ph) return NULL;
memset(chd_ph, 0, sizeof(chd_ph_config_data_t)); memset(chd_ph, 0, sizeof(chd_ph_config_data_t));
chd_ph->hashfunc = CMPH_HASH_JENKINS; chd_ph->hashfunc = CMPH_HASH_JENKINS;
chd_ph->cs = NULL; chd_ph->cs = NULL;
chd_ph->nbuckets = 0; chd_ph->nbuckets = 0;
@ -159,7 +159,7 @@ chd_ph_config_data_t *chd_ph_config_new(void)
chd_ph->keys_per_bin = 1; chd_ph->keys_per_bin = 1;
chd_ph->keys_per_bucket = 4; chd_ph->keys_per_bucket = 4;
chd_ph->occup_table = 0; chd_ph->occup_table = 0;
return chd_ph; return chd_ph;
} }
@ -184,7 +184,7 @@ void chd_ph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
while(*hashptr != CMPH_HASH_COUNT) while(*hashptr != CMPH_HASH_COUNT)
{ {
if (i >= 1) break; //chd_ph only uses one linear hash function if (i >= 1) break; //chd_ph only uses one linear hash function
chd_ph->hashfunc = *hashptr; chd_ph->hashfunc = *hashptr;
++i, ++hashptr; ++i, ++hashptr;
} }
} }
@ -228,24 +228,24 @@ cmph_uint8 chd_ph_mapping(cmph_config_t *mph, chd_ph_bucket_t * buckets, chd_ph_
{ {
mapping_iterations--; mapping_iterations--;
if (chd_ph->hl) hash_state_destroy(chd_ph->hl); if (chd_ph->hl) hash_state_destroy(chd_ph->hl);
chd_ph->hl = hash_state_new(chd_ph->hashfunc, chd_ph->m); chd_ph->hl = hash_state_new(chd_ph->hashfunc, chd_ph->m);
chd_ph_bucket_clean(buckets, chd_ph->nbuckets); chd_ph_bucket_clean(buckets, chd_ph->nbuckets);
mph->key_source->rewind(mph->key_source->data); mph->key_source->rewind(mph->key_source->data);
for(i = 0; i < chd_ph->m; i++) for(i = 0; i < chd_ph->m; i++)
{ {
mph->key_source->read(mph->key_source->data, &key, &keylen); mph->key_source->read(mph->key_source->data, &key, &keylen);
hash_vector(chd_ph->hl, key, keylen, hl); hash_vector(chd_ph->hl, key, keylen, hl);
map_item = (map_items + i); map_item = (map_items + i);
g = hl[0] % chd_ph->nbuckets; g = hl[0] % chd_ph->nbuckets;
map_item->f = hl[1] % chd_ph->n; map_item->f = hl[1] % chd_ph->n;
map_item->h = hl[2] % (chd_ph->n - 1) + 1; map_item->h = hl[2] % (chd_ph->n - 1) + 1;
map_item->bucket_num=g; map_item->bucket_num=g;
mph->key_source->dispose(mph->key_source->data, key, keylen); mph->key_source->dispose(mph->key_source->data, key, keylen);
// if(buckets[g].size == (chd_ph->keys_per_bucket << 2)) // if(buckets[g].size == (chd_ph->keys_per_bucket << 2))
// { // {
// DEBUGP("BUCKET = %u -- SIZE = %u -- MAXIMUM SIZE = %u\n", g, buckets[g].size, (chd_ph->keys_per_bucket << 2)); // DEBUGP("BUCKET = %u -- SIZE = %u -- MAXIMUM SIZE = %u\n", g, buckets[g].size, (chd_ph->keys_per_bucket << 2));
@ -275,7 +275,7 @@ cmph_uint8 chd_ph_mapping(cmph_config_t *mph, chd_ph_bucket_t * buckets, chd_ph_
free(map_items); free(map_items);
return 1; // SUCCESS return 1; // SUCCESS
} }
if(mapping_iterations == 0) if(mapping_iterations == 0)
{ {
goto error; goto error;
@ -292,7 +292,7 @@ chd_ph_sorted_list_t * chd_ph_ordering(chd_ph_bucket_t ** _buckets, chd_ph_item_
cmph_uint32 nbuckets, cmph_uint32 nitems, cmph_uint32 max_bucket_size) cmph_uint32 nbuckets, cmph_uint32 nitems, cmph_uint32 max_bucket_size)
{ {
chd_ph_sorted_list_t * sorted_lists = (chd_ph_sorted_list_t *) calloc(max_bucket_size + 1, sizeof(chd_ph_sorted_list_t)); chd_ph_sorted_list_t * sorted_lists = (chd_ph_sorted_list_t *) calloc(max_bucket_size + 1, sizeof(chd_ph_sorted_list_t));
chd_ph_bucket_t * input_buckets = (*_buckets); chd_ph_bucket_t * input_buckets = (*_buckets);
chd_ph_bucket_t * output_buckets; chd_ph_bucket_t * output_buckets;
chd_ph_item_t * input_items = (*_items); chd_ph_item_t * input_items = (*_items);
@ -319,7 +319,7 @@ chd_ph_sorted_list_t * chd_ph_ordering(chd_ph_bucket_t ** _buckets, chd_ph_item_
// Store the buckets in a new array which is sorted by bucket sizes // Store the buckets in a new array which is sorted by bucket sizes
output_buckets = calloc(nbuckets, sizeof(chd_ph_bucket_t)); // everything is initialized with zero output_buckets = calloc(nbuckets, sizeof(chd_ph_bucket_t)); // everything is initialized with zero
// non_empty_buckets = nbuckets; // non_empty_buckets = nbuckets;
for(i = 0; i < nbuckets; i++) for(i = 0; i < nbuckets; i++)
{ {
bucket_size = input_buckets[i].size; bucket_size = input_buckets[i].size;
@ -338,8 +338,8 @@ chd_ph_sorted_list_t * chd_ph_ordering(chd_ph_bucket_t ** _buckets, chd_ph_item_
// Return the buckets sorted in new order and free the old buckets sorted in old order // Return the buckets sorted in new order and free the old buckets sorted in old order
free(input_buckets); free(input_buckets);
(*_buckets) = output_buckets; (*_buckets) = output_buckets;
// Store the items according to the new order of buckets. // Store the items according to the new order of buckets.
output_items = (chd_ph_item_t*)calloc(nitems, sizeof(chd_ph_item_t)); output_items = (chd_ph_item_t*)calloc(nitems, sizeof(chd_ph_item_t));
position = 0; position = 0;
@ -426,26 +426,26 @@ static inline cmph_uint8 place_bucket_probe(chd_ph_config_data_t *chd_ph, chd_ph
} }
position = (cmph_uint32)((item->f + ((cmph_uint64 )item->h) * probe0_num + probe1_num) % chd_ph->n); position = (cmph_uint32)((item->f + ((cmph_uint64 )item->h) * probe0_num + probe1_num) % chd_ph->n);
UNSETBIT32(((cmph_uint32*)chd_ph->occup_table), position); UNSETBIT32(((cmph_uint32*)chd_ph->occup_table), position);
// ([position/32]^=(1<<(position%32)); // ([position/32]^=(1<<(position%32));
item++; item++;
i--; i--;
}; };
}; };
return 0; return 0;
} }
return 1; return 1;
}; };
static inline cmph_uint8 place_bucket(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, chd_ph_item_t * items, cmph_uint32 max_probes, static inline cmph_uint8 place_bucket(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, chd_ph_item_t * items, cmph_uint32 max_probes,
cmph_uint32 * disp_table, cmph_uint32 bucket_num, cmph_uint32 size) cmph_uint32 * disp_table, cmph_uint32 bucket_num, cmph_uint32 size)
{ {
register cmph_uint32 probe0_num, probe1_num, probe_num; register cmph_uint32 probe0_num, probe1_num, probe_num;
probe0_num = 0; probe0_num = 0;
probe1_num = 0; probe1_num = 0;
probe_num = 0; probe_num = 0;
while(1) while(1)
{ {
if(place_bucket_probe(chd_ph, buckets, items, probe0_num, probe1_num, bucket_num,size)) if(place_bucket_probe(chd_ph, buckets, items, probe0_num, probe1_num, bucket_num,size))
@ -469,7 +469,7 @@ static inline cmph_uint8 place_bucket(chd_ph_config_data_t *chd_ph, chd_ph_bucke
}; };
static inline cmph_uint8 place_buckets1(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t * buckets, chd_ph_item_t *items, static inline cmph_uint8 place_buckets1(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t * buckets, chd_ph_item_t *items,
cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes, cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes,
cmph_uint32 * disp_table) cmph_uint32 * disp_table)
{ {
register cmph_uint32 i = 0; register cmph_uint32 i = 0;
@ -490,8 +490,8 @@ static inline cmph_uint8 place_buckets1(chd_ph_config_data_t *chd_ph, chd_ph_buc
return 1; return 1;
}; };
static inline cmph_uint8 place_buckets2(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, chd_ph_item_t * items, static inline cmph_uint8 place_buckets2(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, chd_ph_item_t * items,
cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes, cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes,
cmph_uint32 * disp_table) cmph_uint32 * disp_table)
{ {
register cmph_uint32 i,j, non_placed_bucket; register cmph_uint32 i,j, non_placed_bucket;
@ -516,10 +516,10 @@ static inline cmph_uint8 place_buckets2(chd_ph_config_data_t *chd_ph, chd_ph_buc
{ {
// if bucket is successfully placed remove it from list // if bucket is successfully placed remove it from list
if(place_bucket_probe(chd_ph, buckets, items, probe0_num, probe1_num, curr_bucket, i)) if(place_bucket_probe(chd_ph, buckets, items, probe0_num, probe1_num, curr_bucket, i))
{ {
disp_table[buckets[curr_bucket].bucket_id] = probe0_num + probe1_num * chd_ph->n; disp_table[buckets[curr_bucket].bucket_id] = probe0_num + probe1_num * chd_ph->n;
// DEBUGP("BUCKET %u PLACED --- DISPLACEMENT = %u\n", curr_bucket, disp_table[curr_bucket]); // DEBUGP("BUCKET %u PLACED --- DISPLACEMENT = %u\n", curr_bucket, disp_table[curr_bucket]);
} }
else else
{ {
// DEBUGP("BUCKET %u NOT PLACED\n", curr_bucket); // DEBUGP("BUCKET %u NOT PLACED\n", curr_bucket);
@ -529,7 +529,7 @@ static inline cmph_uint8 place_buckets2(chd_ph_config_data_t *chd_ph, chd_ph_buc
#endif #endif
buckets[non_placed_bucket + sorted_lists[i].buckets_list].items_list = buckets[curr_bucket].items_list; buckets[non_placed_bucket + sorted_lists[i].buckets_list].items_list = buckets[curr_bucket].items_list;
buckets[non_placed_bucket + sorted_lists[i].buckets_list].bucket_id = buckets[curr_bucket].bucket_id; buckets[non_placed_bucket + sorted_lists[i].buckets_list].bucket_id = buckets[curr_bucket].bucket_id;
#ifdef DEBUG #ifdef DEBUG
buckets[curr_bucket].items_list=items_list; buckets[curr_bucket].items_list=items_list;
buckets[curr_bucket].bucket_id=bucket_id; buckets[curr_bucket].bucket_id=bucket_id;
#endif #endif
@ -557,7 +557,7 @@ static inline cmph_uint8 place_buckets2(chd_ph_config_data_t *chd_ph, chd_ph_buc
}; };
cmph_uint8 chd_ph_searching(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, chd_ph_item_t *items , cmph_uint8 chd_ph_searching(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, chd_ph_item_t *items ,
cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes, cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes,
cmph_uint32 * disp_table) cmph_uint32 * disp_table)
{ {
if(chd_ph->use_h) if(chd_ph->use_h)
@ -582,7 +582,7 @@ static inline cmph_uint8 chd_ph_check_bin_hashing(chd_ph_config_data_t *chd_ph,
memset(chd_ph->occup_table, 0, chd_ph->n); memset(chd_ph->occup_table, 0, chd_ph->n);
else else
memset(chd_ph->occup_table, 0, ((chd_ph->n + 31)/32) * sizeof(cmph_uint32)); memset(chd_ph->occup_table, 0, ((chd_ph->n + 31)/32) * sizeof(cmph_uint32));
for(bucket_size = 1; bucket_size <= max_bucket_size; bucket_size++) for(bucket_size = 1; bucket_size <= max_bucket_size; bucket_size++)
for(i = sorted_lists[bucket_size].buckets_list; i < sorted_lists[bucket_size].size + for(i = sorted_lists[bucket_size].buckets_list; i < sorted_lists[bucket_size].size +
sorted_lists[bucket_size].buckets_list; i++) sorted_lists[bucket_size].buckets_list; i++)
@ -602,7 +602,7 @@ static inline cmph_uint8 chd_ph_check_bin_hashing(chd_ph_config_data_t *chd_ph,
return 0; return 0;
} }
(chd_ph->occup_table[position])++; (chd_ph->occup_table[position])++;
} }
else else
{ {
if(GETBIT32(((cmph_uint32*)chd_ph->occup_table), position)) if(GETBIT32(((cmph_uint32*)chd_ph->occup_table), position))
@ -624,7 +624,7 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
cmph_t *mphf = NULL; cmph_t *mphf = NULL;
chd_ph_data_t *chd_phf = NULL; chd_ph_data_t *chd_phf = NULL;
chd_ph_config_data_t *chd_ph = (chd_ph_config_data_t *)mph->data; chd_ph_config_data_t *chd_ph = (chd_ph_config_data_t *)mph->data;
register double load_factor = c; register double load_factor = c;
register cmph_uint8 searching_success = 0; register cmph_uint8 searching_success = 0;
register cmph_uint32 max_probes = 1 << 20; // default value for max_probes register cmph_uint32 max_probes = 1 << 20; // default value for max_probes
@ -645,24 +645,24 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
chd_ph->m = mph->key_source->nkeys; chd_ph->m = mph->key_source->nkeys;
DEBUGP("m = %u\n", chd_ph->m); DEBUGP("m = %u\n", chd_ph->m);
chd_ph->nbuckets = (cmph_uint32)(chd_ph->m/chd_ph->keys_per_bucket) + 1; chd_ph->nbuckets = (cmph_uint32)(chd_ph->m/chd_ph->keys_per_bucket) + 1;
DEBUGP("nbuckets = %u\n", chd_ph->nbuckets); DEBUGP("nbuckets = %u\n", chd_ph->nbuckets);
if(load_factor < 0.5 ) if(load_factor < 0.5 )
{ {
load_factor = 0.5; load_factor = 0.5;
} }
if(load_factor >= 0.99) if(load_factor >= 0.99)
{ {
load_factor = 0.99; load_factor = 0.99;
} }
DEBUGP("load_factor = %.3f\n", load_factor); DEBUGP("load_factor = %.3f\n", load_factor);
chd_ph->n = (cmph_uint32)(chd_ph->m/(chd_ph->keys_per_bin * load_factor)) + 1; chd_ph->n = (cmph_uint32)(chd_ph->m/(chd_ph->keys_per_bin * load_factor)) + 1;
//Round the number of bins to the prime immediately above //Round the number of bins to the prime immediately above
if(chd_ph->n % 2 == 0) chd_ph->n++; if(chd_ph->n % 2 == 0) chd_ph->n++;
for(;;) for(;;)
@ -670,35 +670,35 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
if(check_primality(chd_ph->n) == 1) if(check_primality(chd_ph->n) == 1)
break; break;
chd_ph->n += 2; // just odd numbers can be primes for n > 2 chd_ph->n += 2; // just odd numbers can be primes for n > 2
}; };
DEBUGP("n = %u \n", chd_ph->n); DEBUGP("n = %u \n", chd_ph->n);
if(chd_ph->keys_per_bin == 1) if(chd_ph->keys_per_bin == 1)
{ {
space_lower_bound = chd_ph_space_lower_bound(chd_ph->m, chd_ph->n); space_lower_bound = chd_ph_space_lower_bound(chd_ph->m, chd_ph->n);
} }
if(mph->verbosity) if(mph->verbosity)
{ {
fprintf(stderr, "space lower bound is %.3f bits per key\n", space_lower_bound); fprintf(stderr, "space lower bound is %.3f bits per key\n", space_lower_bound);
} }
// We allocate the working tables // We allocate the working tables
buckets = chd_ph_bucket_new(chd_ph->nbuckets); buckets = chd_ph_bucket_new(chd_ph->nbuckets);
items = (chd_ph_item_t *) calloc(chd_ph->m, sizeof(chd_ph_item_t)); items = (chd_ph_item_t *) calloc(chd_ph->m, sizeof(chd_ph_item_t));
max_probes = (cmph_uint32)(((log(chd_ph->m)/log(2))/20) * max_probes); max_probes = (cmph_uint32)(((log(chd_ph->m)/log(2))/20) * max_probes);
if(chd_ph->keys_per_bin == 1) if(chd_ph->keys_per_bin == 1)
chd_ph->occup_table = (cmph_uint8 *) calloc(((chd_ph->n + 31)/32), sizeof(cmph_uint32)); chd_ph->occup_table = (cmph_uint8 *) calloc(((chd_ph->n + 31)/32), sizeof(cmph_uint32));
else else
chd_ph->occup_table = (cmph_uint8 *) calloc(chd_ph->n, sizeof(cmph_uint8)); chd_ph->occup_table = (cmph_uint8 *) calloc(chd_ph->n, sizeof(cmph_uint8));
disp_table = (cmph_uint32 *) calloc(chd_ph->nbuckets, sizeof(cmph_uint32)); disp_table = (cmph_uint32 *) calloc(chd_ph->nbuckets, sizeof(cmph_uint32));
// //
// init_genrand(time(0)); // init_genrand(time(0));
while(1) while(1)
{ {
iterations --; iterations --;
@ -706,12 +706,12 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
{ {
fprintf(stderr, "Starting mapping step for mph creation of %u keys with %u bins\n", chd_ph->m, chd_ph->n); fprintf(stderr, "Starting mapping step for mph creation of %u keys with %u bins\n", chd_ph->m, chd_ph->n);
} }
if(!chd_ph_mapping(mph, buckets, items, &max_bucket_size)) if(!chd_ph_mapping(mph, buckets, items, &max_bucket_size))
{ {
if (mph->verbosity) if (mph->verbosity)
{ {
fprintf(stderr, "Failure in mapping step\n"); fprintf(stderr, "Failure in mapping step\n");
} }
failure = 1; failure = 1;
goto cleanup; goto cleanup;
@ -727,15 +727,15 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
} }
sorted_lists = chd_ph_ordering(&buckets, &items, chd_ph->nbuckets, chd_ph->m, max_bucket_size); sorted_lists = chd_ph_ordering(&buckets, &items, chd_ph->nbuckets, chd_ph->m, max_bucket_size);
if (mph->verbosity) if (mph->verbosity)
{ {
fprintf(stderr, "Starting searching step\n"); fprintf(stderr, "Starting searching step\n");
} }
searching_success = chd_ph_searching(chd_ph, buckets, items, max_bucket_size, sorted_lists, max_probes, disp_table); searching_success = chd_ph_searching(chd_ph, buckets, items, max_bucket_size, sorted_lists, max_probes, disp_table);
if(searching_success) break; if(searching_success) break;
// reset occup_table // reset occup_table
if(chd_ph->keys_per_bin > 1) if(chd_ph->keys_per_bin > 1)
memset(chd_ph->occup_table, 0, chd_ph->n); memset(chd_ph->occup_table, 0, chd_ph->n);
@ -757,19 +757,19 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
{ {
if(!chd_ph_check_bin_hashing(chd_ph, buckets, items, disp_table,sorted_lists,max_bucket_size)) if(!chd_ph_check_bin_hashing(chd_ph, buckets, items, disp_table,sorted_lists,max_bucket_size))
{ {
DEBUGP("Error for bin packing generation"); DEBUGP("Error for bin packing generation");
failure = 1; failure = 1;
goto cleanup; goto cleanup;
} }
} }
#endif #endif
if (mph->verbosity) if (mph->verbosity)
{ {
fprintf(stderr, "Starting compressing step\n"); fprintf(stderr, "Starting compressing step\n");
} }
if(chd_ph->cs) if(chd_ph->cs)
{ {
free(chd_ph->cs); free(chd_ph->cs);
@ -777,7 +777,7 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
chd_ph->cs = (compressed_seq_t *) calloc(1, sizeof(compressed_seq_t)); chd_ph->cs = (compressed_seq_t *) calloc(1, sizeof(compressed_seq_t));
compressed_seq_init(chd_ph->cs); compressed_seq_init(chd_ph->cs);
compressed_seq_generate(chd_ph->cs, disp_table, chd_ph->nbuckets); compressed_seq_generate(chd_ph->cs, disp_table, chd_ph->nbuckets);
#ifdef CMPH_TIMING #ifdef CMPH_TIMING
ELAPSED_TIME_IN_SECONDS(&construction_time); ELAPSED_TIME_IN_SECONDS(&construction_time);
register double entropy = chd_ph_get_entropy(disp_table, chd_ph->nbuckets, max_probes); register double entropy = chd_ph_get_entropy(disp_table, chd_ph->nbuckets, max_probes);
@ -785,11 +785,11 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
#endif #endif
cleanup: cleanup:
chd_ph_bucket_destroy(buckets); chd_ph_bucket_destroy(buckets);
free(items); free(items);
free(sorted_lists); free(sorted_lists);
free(disp_table); free(disp_table);
if(failure) if(failure)
{ {
if(chd_ph->hl) if(chd_ph->hl)
{ {
@ -802,14 +802,14 @@ cleanup:
mphf = (cmph_t *)malloc(sizeof(cmph_t)); mphf = (cmph_t *)malloc(sizeof(cmph_t));
mphf->algo = mph->algo; mphf->algo = mph->algo;
chd_phf = (chd_ph_data_t *)malloc(sizeof(chd_ph_data_t)); chd_phf = (chd_ph_data_t *)malloc(sizeof(chd_ph_data_t));
chd_phf->cs = chd_ph->cs; chd_phf->cs = chd_ph->cs;
chd_ph->cs = NULL; //transfer memory ownership chd_ph->cs = NULL; //transfer memory ownership
chd_phf->hl = chd_ph->hl; chd_phf->hl = chd_ph->hl;
chd_ph->hl = NULL; //transfer memory ownership chd_ph->hl = NULL; //transfer memory ownership
chd_phf->n = chd_ph->n; chd_phf->n = chd_ph->n;
chd_phf->nbuckets = chd_ph->nbuckets; chd_phf->nbuckets = chd_ph->nbuckets;
mphf->data = chd_phf; mphf->data = chd_phf;
mphf->size = chd_ph->n; mphf->size = chd_ph->n;
@ -818,12 +818,12 @@ cleanup:
{ {
fprintf(stderr, "Successfully generated minimal perfect hash function\n"); fprintf(stderr, "Successfully generated minimal perfect hash function\n");
} }
#ifdef CMPH_TIMING #ifdef CMPH_TIMING
register cmph_uint32 space_usage = chd_ph_packed_size(mphf)*8; register cmph_uint32 space_usage = chd_ph_packed_size(mphf)*8;
construction_time = construction_time - construction_time_begin; construction_time = construction_time - construction_time_begin;
fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\t%.4f\t%.4f\n", chd_ph->m, load_factor, chd_ph->keys_per_bucket, construction_time, space_usage/(double)chd_ph->m, space_lower_bound, entropy/chd_ph->m); fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\t%.4f\t%.4f\n", chd_ph->m, load_factor, chd_ph->keys_per_bucket, construction_time, space_usage/(double)chd_ph->m, space_lower_bound, entropy/chd_ph->m);
#endif #endif
return mphf; return mphf;
} }
@ -846,19 +846,19 @@ void chd_ph_load(FILE *fd, cmph_t *mphf)
nbytes = fread(buf, (size_t)buflen, (size_t)1, fd); nbytes = fread(buf, (size_t)buflen, (size_t)1, fd);
chd_ph->hl = hash_state_load(buf, buflen); chd_ph->hl = hash_state_load(buf, buflen);
free(buf); free(buf);
nbytes = fread(&buflen, sizeof(cmph_uint32), (size_t)1, fd); nbytes = fread(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
DEBUGP("Compressed sequence structure has %u bytes\n", buflen); DEBUGP("Compressed sequence structure has %u bytes\n", buflen);
buf = (char *)malloc((size_t)buflen); buf = (char *)malloc((size_t)buflen);
nbytes = fread(buf, (size_t)buflen, (size_t)1, fd); nbytes = fread(buf, (size_t)buflen, (size_t)1, fd);
chd_ph->cs = (compressed_seq_t *) calloc(1, sizeof(compressed_seq_t)); chd_ph->cs = (compressed_seq_t *) calloc(1, sizeof(compressed_seq_t));
compressed_seq_load(chd_ph->cs, buf, buflen); compressed_seq_load(chd_ph->cs, buf, buflen);
free(buf); free(buf);
// loading n and nbuckets // loading n and nbuckets
DEBUGP("Reading n and nbuckets\n"); DEBUGP("Reading n and nbuckets\n");
nbytes = fread(&(chd_ph->n), sizeof(cmph_uint32), (size_t)1, fd); nbytes = fread(&(chd_ph->n), sizeof(cmph_uint32), (size_t)1, fd);
nbytes = fread(&(chd_ph->nbuckets), sizeof(cmph_uint32), (size_t)1, fd); nbytes = fread(&(chd_ph->nbuckets), sizeof(cmph_uint32), (size_t)1, fd);
} }
int chd_ph_dump(cmph_t *mphf, FILE *fd) int chd_ph_dump(cmph_t *mphf, FILE *fd)
@ -867,7 +867,7 @@ int chd_ph_dump(cmph_t *mphf, FILE *fd)
cmph_uint32 buflen; cmph_uint32 buflen;
register size_t nbytes; register size_t nbytes;
chd_ph_data_t *data = (chd_ph_data_t *)mphf->data; chd_ph_data_t *data = (chd_ph_data_t *)mphf->data;
__cmph_dump(mphf, fd); __cmph_dump(mphf, fd);
hash_state_dump(data->hl, &buf, &buflen); hash_state_dump(data->hl, &buf, &buflen);
@ -906,11 +906,11 @@ cmph_uint32 chd_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
register cmph_uint32 disp,position; register cmph_uint32 disp,position;
register cmph_uint32 probe0_num,probe1_num; register cmph_uint32 probe0_num,probe1_num;
register cmph_uint32 f,g,h; register cmph_uint32 f,g,h;
hash_vector(chd_ph->hl, key, keylen, hl); hash_vector(chd_ph->hl, key, keylen, hl);
g = hl[0] % chd_ph->nbuckets; g = hl[0] % chd_ph->nbuckets;
f = hl[1] % chd_ph->n; f = hl[1] % chd_ph->n;
h = hl[2] % (chd_ph->n-1) + 1; h = hl[2] % (chd_ph->n-1) + 1;
disp = compressed_seq_query(chd_ph->cs, g); disp = compressed_seq_query(chd_ph->cs, g);
probe0_num = disp % chd_ph->n; probe0_num = disp % chd_ph->n;
probe1_num = disp/chd_ph->n; probe1_num = disp/chd_ph->n;
@ -949,10 +949,10 @@ void chd_ph_pack(cmph_t *mphf, void *packed_mphf)
cmph_uint32 chd_ph_packed_size(cmph_t *mphf) cmph_uint32 chd_ph_packed_size(cmph_t *mphf)
{ {
register chd_ph_data_t *data = (chd_ph_data_t *)mphf->data; register chd_ph_data_t *data = (chd_ph_data_t *)mphf->data;
register CMPH_HASH hl_type = hash_get_type(data->hl); register CMPH_HASH hl_type = hash_get_type(data->hl);
register cmph_uint32 hash_state_pack_size = hash_state_packed_size(hl_type); register cmph_uint32 hash_state_pack_size = hash_state_packed_size(hl_type);
register cmph_uint32 cs_pack_size = compressed_seq_packed_size(data->cs); register cmph_uint32 cs_pack_size = compressed_seq_packed_size(data->cs);
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_pack_size + cs_pack_size + 3*sizeof(cmph_uint32)); return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_pack_size + cs_pack_size + 3*sizeof(cmph_uint32));
} }
@ -961,28 +961,25 @@ cmph_uint32 chd_ph_search_packed(void *packed_mphf, const char *key, cmph_uint32
{ {
register CMPH_HASH hl_type = *(cmph_uint32 *)packed_mphf; register CMPH_HASH hl_type = *(cmph_uint32 *)packed_mphf;
register cmph_uint8 *hl_ptr = (cmph_uint8 *)(packed_mphf) + 4; register cmph_uint8 *hl_ptr = (cmph_uint8 *)(packed_mphf) + 4;
register cmph_uint32 * ptr = (cmph_uint32 *)(hl_ptr + hash_state_packed_size(hl_type)); register cmph_uint32 * ptr = (cmph_uint32 *)(hl_ptr + hash_state_packed_size(hl_type));
register cmph_uint32 n = *ptr++; register cmph_uint32 n = *ptr++;
register cmph_uint32 nbuckets = *ptr++; register cmph_uint32 nbuckets = *ptr++;
cmph_uint32 hl[3]; cmph_uint32 hl[3];
register cmph_uint32 disp,position; register cmph_uint32 disp,position;
register cmph_uint32 probe0_num,probe1_num; register cmph_uint32 probe0_num,probe1_num;
register cmph_uint32 f,g,h; register cmph_uint32 f,g,h;
hash_vector_packed(hl_ptr, hl_type, key, keylen, hl); hash_vector_packed(hl_ptr, hl_type, key, keylen, hl);
g = hl[0] % nbuckets; g = hl[0] % nbuckets;
f = hl[1] % n; f = hl[1] % n;
h = hl[2] % (n-1) + 1; h = hl[2] % (n-1) + 1;
disp = compressed_seq_query_packed(ptr, g); disp = compressed_seq_query_packed(ptr, g);
probe0_num = disp % n; probe0_num = disp % n;
probe1_num = disp/n; probe1_num = disp/n;
position = (cmph_uint32)((f + ((cmph_uint64 )h)*probe0_num + probe1_num) % n); position = (cmph_uint32)((f + ((cmph_uint64 )h)*probe0_num + probe1_num) % n);
return position; return position;
} }

View File

@ -21,7 +21,7 @@ chm_config_data_t *chm_config_new(void)
{ {
chm_config_data_t *chm = NULL; chm_config_data_t *chm = NULL;
chm = (chm_config_data_t *)malloc(sizeof(chm_config_data_t)); chm = (chm_config_data_t *)malloc(sizeof(chm_config_data_t));
assert(chm); if (!chm) return NULL;
memset(chm, 0, sizeof(chm_config_data_t)); memset(chm, 0, sizeof(chm_config_data_t));
chm->hashfuncs[0] = CMPH_HASH_JENKINS; chm->hashfuncs[0] = CMPH_HASH_JENKINS;
chm->hashfuncs[1] = CMPH_HASH_JENKINS; chm->hashfuncs[1] = CMPH_HASH_JENKINS;
@ -45,7 +45,7 @@ void chm_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
while(*hashptr != CMPH_HASH_COUNT) while(*hashptr != CMPH_HASH_COUNT)
{ {
if (i >= 2) break; //chm only uses two hash functions if (i >= 2) break; //chm only uses two hash functions
chm->hashfuncs[i] = *hashptr; chm->hashfuncs[i] = *hashptr;
++i, ++hashptr; ++i, ++hashptr;
} }
} }
@ -61,7 +61,7 @@ cmph_t *chm_new(cmph_config_t *mph, double c)
chm_config_data_t *chm = (chm_config_data_t *)mph->data; chm_config_data_t *chm = (chm_config_data_t *)mph->data;
chm->m = mph->key_source->nkeys; chm->m = mph->key_source->nkeys;
if (c == 0) c = 2.09; if (c == 0) c = 2.09;
chm->n = (cmph_uint32)ceil(c * mph->key_source->nkeys); chm->n = (cmph_uint32)ceil(c * mph->key_source->nkeys);
DEBUGP("m (edges): %u n (vertices): %u c: %f\n", chm->m, chm->n, c); DEBUGP("m (edges): %u n (vertices): %u c: %f\n", chm->m, chm->n, c);
chm->graph = graph_new(chm->n, chm->m); chm->graph = graph_new(chm->n, chm->m);
DEBUGP("Created graph\n"); DEBUGP("Created graph\n");
@ -92,12 +92,12 @@ cmph_t *chm_new(cmph_config_t *mph, double c)
fprintf(stderr, "Acyclic graph creation failure - %u iterations remaining\n", iterations); fprintf(stderr, "Acyclic graph creation failure - %u iterations remaining\n", iterations);
} }
if (iterations == 0) break; if (iterations == 0) break;
} }
else break; else break;
} }
if (iterations == 0) if (iterations == 0)
{ {
graph_destroy(chm->graph); graph_destroy(chm->graph);
return NULL; return NULL;
} }
@ -120,7 +120,7 @@ cmph_t *chm_new(cmph_config_t *mph, double c)
chm_traverse(chm, visited, i); chm_traverse(chm, visited, i);
} }
} }
graph_destroy(chm->graph); graph_destroy(chm->graph);
free(visited); free(visited);
chm->graph = NULL; chm->graph = NULL;
@ -149,7 +149,7 @@ static void chm_traverse(chm_config_data_t *chm, cmph_uint8 *visited, cmph_uint3
graph_iterator_t it = graph_neighbors_it(chm->graph, v); graph_iterator_t it = graph_neighbors_it(chm->graph, v);
cmph_uint32 neighbor = 0; cmph_uint32 neighbor = 0;
SETBIT(visited,v); SETBIT(visited,v);
DEBUGP("Visiting vertex %u\n", v); DEBUGP("Visiting vertex %u\n", v);
while((neighbor = graph_next_neighbor(chm->graph, &it)) != GRAPH_NO_NEIGHBOR) while((neighbor = graph_next_neighbor(chm->graph, &it)) != GRAPH_NO_NEIGHBOR)
{ {
@ -162,7 +162,7 @@ static void chm_traverse(chm_config_data_t *chm, cmph_uint8 *visited, cmph_uint3
chm_traverse(chm, visited, neighbor); chm_traverse(chm, visited, neighbor);
} }
} }
static int chm_gen_edges(cmph_config_t *mph) static int chm_gen_edges(cmph_config_t *mph)
{ {
cmph_uint32 e; cmph_uint32 e;
@ -170,7 +170,7 @@ static int chm_gen_edges(cmph_config_t *mph)
int cycles = 0; int cycles = 0;
DEBUGP("Generating edges for %u vertices with hash functions %s and %s\n", chm->n, cmph_hash_names[chm->hashfuncs[0]], cmph_hash_names[chm->hashfuncs[1]]); DEBUGP("Generating edges for %u vertices with hash functions %s and %s\n", chm->n, cmph_hash_names[chm->hashfuncs[0]], cmph_hash_names[chm->hashfuncs[1]]);
graph_clear_edges(chm->graph); graph_clear_edges(chm->graph);
mph->key_source->rewind(mph->key_source->data); mph->key_source->rewind(mph->key_source->data);
for (e = 0; e < mph->key_source->nkeys; ++e) for (e = 0; e < mph->key_source->nkeys; ++e)
{ {
@ -181,7 +181,7 @@ static int chm_gen_edges(cmph_config_t *mph)
h1 = hash(chm->hashes[0], key, keylen) % chm->n; h1 = hash(chm->hashes[0], key, keylen) % chm->n;
h2 = hash(chm->hashes[1], key, keylen) % chm->n; h2 = hash(chm->hashes[1], key, keylen) % chm->n;
if (h1 == h2) if (++h2 >= chm->n) h2 = 0; if (h1 == h2) if (++h2 >= chm->n) h2 = 0;
if (h1 == h2) if (h1 == h2)
{ {
if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e); if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e);
mph->key_source->dispose(mph->key_source->data, key, keylen); mph->key_source->dispose(mph->key_source->data, key, keylen);
@ -205,7 +205,7 @@ int chm_dump(cmph_t *mphf, FILE *fd)
cmph_uint32 two = 2; //number of hash functions cmph_uint32 two = 2; //number of hash functions
chm_data_t *data = (chm_data_t *)mphf->data; chm_data_t *data = (chm_data_t *)mphf->data;
register size_t nbytes; register size_t nbytes;
__cmph_dump(mphf, fd); __cmph_dump(mphf, fd);
nbytes = fwrite(&two, sizeof(cmph_uint32), (size_t)1, fd); nbytes = fwrite(&two, sizeof(cmph_uint32), (size_t)1, fd);
@ -223,7 +223,7 @@ int chm_dump(cmph_t *mphf, FILE *fd)
nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd); nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd);
nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd); nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
nbytes = fwrite(data->g, sizeof(cmph_uint32)*data->n, (size_t)1, fd); nbytes = fwrite(data->g, sizeof(cmph_uint32)*data->n, (size_t)1, fd);
/* #ifdef DEBUG /* #ifdef DEBUG
fprintf(stderr, "G: "); fprintf(stderr, "G: ");
@ -260,8 +260,8 @@ void chm_load(FILE *f, cmph_t *mphf)
} }
DEBUGP("Reading m and n\n"); DEBUGP("Reading m and n\n");
nbytes = fread(&(chm->n), sizeof(cmph_uint32), (size_t)1, f); nbytes = fread(&(chm->n), sizeof(cmph_uint32), (size_t)1, f);
nbytes = fread(&(chm->m), sizeof(cmph_uint32), (size_t)1, f); nbytes = fread(&(chm->m), sizeof(cmph_uint32), (size_t)1, f);
chm->g = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*chm->n); chm->g = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*chm->n);
nbytes = fread(chm->g, chm->n*sizeof(cmph_uint32), (size_t)1, f); nbytes = fread(chm->g, chm->n*sizeof(cmph_uint32), (size_t)1, f);
@ -272,7 +272,7 @@ void chm_load(FILE *f, cmph_t *mphf)
#endif #endif
return; return;
} }
cmph_uint32 chm_search(cmph_t *mphf, const char *key, cmph_uint32 keylen) cmph_uint32 chm_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
{ {
@ -287,7 +287,7 @@ cmph_uint32 chm_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
void chm_destroy(cmph_t *mphf) void chm_destroy(cmph_t *mphf)
{ {
chm_data_t *data = (chm_data_t *)mphf->data; chm_data_t *data = (chm_data_t *)mphf->data;
free(data->g); free(data->g);
hash_state_destroy(data->hashes[0]); hash_state_destroy(data->hashes[0]);
hash_state_destroy(data->hashes[1]); hash_state_destroy(data->hashes[1]);
free(data->hashes); free(data->hashes);
@ -298,7 +298,7 @@ void chm_destroy(cmph_t *mphf)
/** \fn void chm_pack(cmph_t *mphf, void *packed_mphf); /** \fn void chm_pack(cmph_t *mphf, void *packed_mphf);
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
* \param mphf pointer to the resulting mphf * \param mphf pointer to the resulting mphf
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
*/ */
void chm_pack(cmph_t *mphf, void *packed_mphf) void chm_pack(cmph_t *mphf, void *packed_mphf)
{ {
@ -332,26 +332,26 @@ void chm_pack(cmph_t *mphf, void *packed_mphf)
ptr += sizeof(data->m); ptr += sizeof(data->m);
// packing g // packing g
memcpy(ptr, data->g, sizeof(cmph_uint32)*data->n); memcpy(ptr, data->g, sizeof(cmph_uint32)*data->n);
} }
/** \fn cmph_uint32 chm_packed_size(cmph_t *mphf); /** \fn cmph_uint32 chm_packed_size(cmph_t *mphf);
* \brief Return the amount of space needed to pack mphf. * \brief Return the amount of space needed to pack mphf.
* \param mphf pointer to a mphf * \param mphf pointer to a mphf
* \return the size of the packed function or zero for failures * \return the size of the packed function or zero for failures
*/ */
cmph_uint32 chm_packed_size(cmph_t *mphf) cmph_uint32 chm_packed_size(cmph_t *mphf)
{ {
chm_data_t *data = (chm_data_t *)mphf->data; chm_data_t *data = (chm_data_t *)mphf->data;
CMPH_HASH h1_type = hash_get_type(data->hashes[0]); CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
CMPH_HASH h2_type = hash_get_type(data->hashes[1]); CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) + return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
4*sizeof(cmph_uint32) + sizeof(cmph_uint32)*data->n); 4*sizeof(cmph_uint32) + sizeof(cmph_uint32)*data->n);
} }
/** cmph_uint32 chm_search(void *packed_mphf, const char *key, cmph_uint32 keylen); /** cmph_uint32 chm_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
* \brief Use the packed mphf to do a search. * \brief Use the packed mphf to do a search.
* \param packed_mphf pointer to the packed mphf * \param packed_mphf pointer to the packed mphf
* \param key key to be hashed * \param key key to be hashed
* \param keylen key legth in bytes * \param keylen key legth in bytes
@ -366,16 +366,16 @@ cmph_uint32 chm_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke
register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type); register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr); register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr);
h2_ptr += 4; h2_ptr += 4;
register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type)); register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type));
register cmph_uint32 n = *g_ptr++; register cmph_uint32 n = *g_ptr++;
register cmph_uint32 m = *g_ptr++; register cmph_uint32 m = *g_ptr++;
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n; register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n;
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n; register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n;
DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2); DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
if (h1 == h2 && ++h2 >= n) h2 = 0; if (h1 == h2 && ++h2 >= n) h2 = 0;
DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, g_ptr[h1], g_ptr[h2], m); DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, g_ptr[h1], g_ptr[h2], m);
return (g_ptr[h1] + g_ptr[h2]) % m; return (g_ptr[h1] + g_ptr[h2]) % m;
} }

View File

@ -1,10 +1,10 @@
#include "cmph.h" #include "cmph.h"
#include "cmph_structs.h" #include "cmph_structs.h"
#include "chm.h" #include "chm.h"
#include "bmz.h" #include "bmz.h"
#include "bmz8.h" #include "bmz8.h"
#include "brz.h" #include "brz.h"
#include "fch.h" #include "fch.h"
#include "bdz.h" #include "bdz.h"
#include "bdz_ph.h" #include "bdz_ph.h"
#include "chd_ph.h" #include "chd_ph.h"
@ -268,12 +268,12 @@ cmph_io_adapter_t *cmph_io_struct_vector_adapter(void * vector, cmph_uint32 stru
key_source->read = key_struct_vector_read; key_source->read = key_struct_vector_read;
key_source->dispose = key_vector_dispose; key_source->dispose = key_vector_dispose;
key_source->rewind = key_struct_vector_rewind; key_source->rewind = key_struct_vector_rewind;
return key_source; return key_source;
} }
void cmph_io_struct_vector_adapter_destroy(cmph_io_adapter_t * key_source) void cmph_io_struct_vector_adapter_destroy(cmph_io_adapter_t * key_source)
{ {
cmph_io_struct_vector_destroy(key_source); cmph_io_struct_vector_destroy(key_source);
} }
cmph_io_adapter_t *cmph_io_vector_adapter(char ** vector, cmph_uint32 nkeys) cmph_io_adapter_t *cmph_io_vector_adapter(char ** vector, cmph_uint32 nkeys)
@ -374,7 +374,7 @@ void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo)
void cmph_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir) void cmph_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir)
{ {
if (mph->algo == CMPH_BRZ) if (mph->algo == CMPH_BRZ)
{ {
brz_config_set_tmp_dir(mph, tmp_dir); brz_config_set_tmp_dir(mph, tmp_dir);
} }
@ -383,7 +383,7 @@ void cmph_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir)
void cmph_config_set_mphf_fd(cmph_config_t *mph, FILE *mphf_fd) void cmph_config_set_mphf_fd(cmph_config_t *mph, FILE *mphf_fd)
{ {
if (mph->algo == CMPH_BRZ) if (mph->algo == CMPH_BRZ)
{ {
brz_config_set_mphf_fd(mph, mphf_fd); brz_config_set_mphf_fd(mph, mphf_fd);
} }
@ -391,19 +391,19 @@ void cmph_config_set_mphf_fd(cmph_config_t *mph, FILE *mphf_fd)
void cmph_config_set_b(cmph_config_t *mph, cmph_uint32 b) void cmph_config_set_b(cmph_config_t *mph, cmph_uint32 b)
{ {
if (mph->algo == CMPH_BRZ) if (mph->algo == CMPH_BRZ)
{ {
brz_config_set_b(mph, b); brz_config_set_b(mph, b);
} }
else if (mph->algo == CMPH_BDZ) else if (mph->algo == CMPH_BDZ)
{ {
bdz_config_set_b(mph, b); bdz_config_set_b(mph, b);
} }
else if (mph->algo == CMPH_CHD_PH) else if (mph->algo == CMPH_CHD_PH)
{ {
chd_ph_config_set_b(mph, b); chd_ph_config_set_b(mph, b);
} }
else if (mph->algo == CMPH_CHD) else if (mph->algo == CMPH_CHD)
{ {
chd_config_set_b(mph, b); chd_config_set_b(mph, b);
} }
@ -411,11 +411,11 @@ void cmph_config_set_b(cmph_config_t *mph, cmph_uint32 b)
void cmph_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin) void cmph_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin)
{ {
if (mph->algo == CMPH_CHD_PH) if (mph->algo == CMPH_CHD_PH)
{ {
chd_ph_config_set_keys_per_bin(mph, keys_per_bin); chd_ph_config_set_keys_per_bin(mph, keys_per_bin);
} }
else if (mph->algo == CMPH_CHD) else if (mph->algo == CMPH_CHD)
{ {
chd_config_set_keys_per_bin(mph, keys_per_bin); chd_config_set_keys_per_bin(mph, keys_per_bin);
} }
@ -423,7 +423,7 @@ void cmph_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin)
void cmph_config_set_memory_availability(cmph_config_t *mph, cmph_uint32 memory_availability) void cmph_config_set_memory_availability(cmph_config_t *mph, cmph_uint32 memory_availability)
{ {
if (mph->algo == CMPH_BRZ) if (mph->algo == CMPH_BRZ)
{ {
brz_config_set_memory_availability(mph, memory_availability); brz_config_set_memory_availability(mph, memory_availability);
} }
@ -523,7 +523,7 @@ cmph_t *cmph_new(cmph_config_t *mph)
double c = mph->c; double c = mph->c;
DEBUGP("Creating mph with algorithm %s\n", cmph_names[mph->algo]); DEBUGP("Creating mph with algorithm %s\n", cmph_names[mph->algo]);
switch (mph->algo) switch (mph->algo)
{ {
case CMPH_CHM: case CMPH_CHM:
DEBUGP("Creating chm hash\n"); DEBUGP("Creating chm hash\n");
@ -658,28 +658,28 @@ cmph_uint32 cmph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
case CMPH_CHM: case CMPH_CHM:
return chm_search(mphf, key, keylen); return chm_search(mphf, key, keylen);
case CMPH_BMZ: /* included -- Fabiano */ case CMPH_BMZ: /* included -- Fabiano */
DEBUGP("bmz algorithm search\n"); DEBUGP("bmz algorithm search\n");
return bmz_search(mphf, key, keylen); return bmz_search(mphf, key, keylen);
case CMPH_BMZ8: /* included -- Fabiano */ case CMPH_BMZ8: /* included -- Fabiano */
DEBUGP("bmz8 algorithm search\n"); DEBUGP("bmz8 algorithm search\n");
return bmz8_search(mphf, key, keylen); return bmz8_search(mphf, key, keylen);
case CMPH_BRZ: /* included -- Fabiano */ case CMPH_BRZ: /* included -- Fabiano */
DEBUGP("brz algorithm search\n"); DEBUGP("brz algorithm search\n");
return brz_search(mphf, key, keylen); return brz_search(mphf, key, keylen);
case CMPH_FCH: /* included -- Fabiano */ case CMPH_FCH: /* included -- Fabiano */
DEBUGP("fch algorithm search\n"); DEBUGP("fch algorithm search\n");
return fch_search(mphf, key, keylen); return fch_search(mphf, key, keylen);
case CMPH_BDZ: /* included -- Fabiano */ case CMPH_BDZ: /* included -- Fabiano */
DEBUGP("bdz algorithm search\n"); DEBUGP("bdz algorithm search\n");
return bdz_search(mphf, key, keylen); return bdz_search(mphf, key, keylen);
case CMPH_BDZ_PH: /* included -- Fabiano */ case CMPH_BDZ_PH: /* included -- Fabiano */
DEBUGP("bdz_ph algorithm search\n"); DEBUGP("bdz_ph algorithm search\n");
return bdz_ph_search(mphf, key, keylen); return bdz_ph_search(mphf, key, keylen);
case CMPH_CHD_PH: /* included -- Fabiano */ case CMPH_CHD_PH: /* included -- Fabiano */
DEBUGP("chd_ph algorithm search\n"); DEBUGP("chd_ph algorithm search\n");
return chd_ph_search(mphf, key, keylen); return chd_ph_search(mphf, key, keylen);
case CMPH_CHD: /* included -- Fabiano */ case CMPH_CHD: /* included -- Fabiano */
DEBUGP("chd algorithm search\n"); DEBUGP("chd algorithm search\n");
return chd_search(mphf, key, keylen); return chd_search(mphf, key, keylen);
default: default:
assert(0); assert(0);
@ -692,7 +692,7 @@ cmph_uint32 cmph_size(cmph_t *mphf)
{ {
return mphf->size; return mphf->size;
} }
void cmph_destroy(cmph_t *mphf) void cmph_destroy(cmph_t *mphf)
{ {
switch(mphf->algo) switch(mphf->algo)
@ -724,7 +724,7 @@ void cmph_destroy(cmph_t *mphf)
case CMPH_CHD: /* included -- Fabiano */ case CMPH_CHD: /* included -- Fabiano */
chd_destroy(mphf); chd_destroy(mphf);
return; return;
default: default:
assert(0); assert(0);
} }
assert(0); assert(0);
@ -735,12 +735,12 @@ void cmph_destroy(cmph_t *mphf)
/** \fn void cmph_pack(cmph_t *mphf, void *packed_mphf); /** \fn void cmph_pack(cmph_t *mphf, void *packed_mphf);
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
* \param mphf pointer to the resulting mphf * \param mphf pointer to the resulting mphf
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
*/ */
void cmph_pack(cmph_t *mphf, void *packed_mphf) void cmph_pack(cmph_t *mphf, void *packed_mphf)
{ {
// packing algorithm type to be used in cmph.c // packing algorithm type to be used in cmph.c
cmph_uint32 * ptr = (cmph_uint32 *) packed_mphf; cmph_uint32 * ptr = (cmph_uint32 *) packed_mphf;
*ptr++ = mphf->algo; *ptr++ = mphf->algo;
DEBUGP("mphf->algo = %u\n", mphf->algo); DEBUGP("mphf->algo = %u\n", mphf->algo);
switch(mphf->algo) switch(mphf->algo)
@ -772,7 +772,7 @@ void cmph_pack(cmph_t *mphf, void *packed_mphf)
case CMPH_CHD: /* included -- Fabiano */ case CMPH_CHD: /* included -- Fabiano */
chd_pack(mphf, ptr); chd_pack(mphf, ptr);
break; break;
default: default:
assert(0); assert(0);
} }
return; return;
@ -782,7 +782,7 @@ void cmph_pack(cmph_t *mphf, void *packed_mphf)
* \brief Return the amount of space needed to pack mphf. * \brief Return the amount of space needed to pack mphf.
* \param mphf pointer to a mphf * \param mphf pointer to a mphf
* \return the size of the packed function or zero for failures * \return the size of the packed function or zero for failures
*/ */
cmph_uint32 cmph_packed_size(cmph_t *mphf) cmph_uint32 cmph_packed_size(cmph_t *mphf)
{ {
switch(mphf->algo) switch(mphf->algo)
@ -805,14 +805,14 @@ cmph_uint32 cmph_packed_size(cmph_t *mphf)
return chd_ph_packed_size(mphf); return chd_ph_packed_size(mphf);
case CMPH_CHD: /* included -- Fabiano */ case CMPH_CHD: /* included -- Fabiano */
return chd_packed_size(mphf); return chd_packed_size(mphf);
default: default:
assert(0); assert(0);
} }
return 0; // FAILURE return 0; // FAILURE
} }
/** cmph_uint32 cmph_search(void *packed_mphf, const char *key, cmph_uint32 keylen); /** cmph_uint32 cmph_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
* \brief Use the packed mphf to do a search. * \brief Use the packed mphf to do a search.
* \param packed_mphf pointer to the packed mphf * \param packed_mphf pointer to the packed mphf
* \param key key to be hashed * \param key key to be hashed
* \param keylen key legth in bytes * \param keylen key legth in bytes
@ -842,7 +842,7 @@ cmph_uint32 cmph_search_packed(void *packed_mphf, const char *key, cmph_uint32 k
return chd_ph_search_packed(++ptr, key, keylen); return chd_ph_search_packed(++ptr, key, keylen);
case CMPH_CHD: /* included -- Fabiano */ case CMPH_CHD: /* included -- Fabiano */
return chd_search_packed(++ptr, key, keylen); return chd_search_packed(++ptr, key, keylen);
default: default:
assert(0); assert(0);
} }
return 0; // FAILURE return 0; // FAILURE

View File

@ -28,7 +28,7 @@ void __cmph_dump(cmph_t *mphf, FILE *fd)
nbytes = fwrite(cmph_names[mphf->algo], (size_t)(strlen(cmph_names[mphf->algo]) + 1), (size_t)1, fd); nbytes = fwrite(cmph_names[mphf->algo], (size_t)(strlen(cmph_names[mphf->algo]) + 1), (size_t)1, fd);
nbytes = fwrite(&(mphf->size), sizeof(mphf->size), (size_t)1, fd); nbytes = fwrite(&(mphf->size), sizeof(mphf->size), (size_t)1, fd);
} }
cmph_t *__cmph_load(FILE *f) cmph_t *__cmph_load(FILE *f)
{ {
cmph_t *mphf = NULL; cmph_t *mphf = NULL;
cmph_uint32 i; cmph_uint32 i;
@ -36,7 +36,7 @@ cmph_t *__cmph_load(FILE *f)
char *ptr = algo_name; char *ptr = algo_name;
CMPH_ALGO algo = CMPH_COUNT; CMPH_ALGO algo = CMPH_COUNT;
register size_t nbytes; register size_t nbytes;
DEBUGP("Loading mphf\n"); DEBUGP("Loading mphf\n");
while(1) while(1)
{ {
@ -52,7 +52,7 @@ cmph_t *__cmph_load(FILE *f)
algo = i; algo = i;
} }
} }
if (algo == CMPH_COUNT) if (algo == CMPH_COUNT)
{ {
DEBUGP("Algorithm %s not found\n", algo_name); DEBUGP("Algorithm %s not found\n", algo_name);
return NULL; return NULL;
@ -65,5 +65,3 @@ cmph_t *__cmph_load(FILE *f)
return mphf; return mphf;
} }

View File

@ -4,6 +4,7 @@
djb2_state_t *djb2_state_new() djb2_state_t *djb2_state_new()
{ {
djb2_state_t *state = (djb2_state_t *)malloc(sizeof(djb2_state_t)); djb2_state_t *state = (djb2_state_t *)malloc(sizeof(djb2_state_t));
if (!djb2_state) return NULL;
state->hashfunc = CMPH_HASH_DJB2; state->hashfunc = CMPH_HASH_DJB2;
return state; return state;
} }
@ -18,7 +19,7 @@ cmph_uint32 djb2_hash(djb2_state_t *state, const char *k, cmph_uint32 keylen)
register cmph_uint32 hash = 5381; register cmph_uint32 hash = 5381;
const unsigned char *ptr = (unsigned char *)k; const unsigned char *ptr = (unsigned char *)k;
cmph_uint32 i = 0; cmph_uint32 i = 0;
while (i < keylen) while (i < keylen)
{ {
hash = hash*33 ^ *ptr; hash = hash*33 ^ *ptr;
++ptr, ++i; ++ptr, ++i;

View File

@ -23,7 +23,7 @@ fch_config_data_t *fch_config_new()
{ {
fch_config_data_t *fch; fch_config_data_t *fch;
fch = (fch_config_data_t *)malloc(sizeof(fch_config_data_t)); fch = (fch_config_data_t *)malloc(sizeof(fch_config_data_t));
assert(fch); if (!fch) return NULL;
memset(fch, 0, sizeof(fch_config_data_t)); memset(fch, 0, sizeof(fch_config_data_t));
fch->hashfuncs[0] = CMPH_HASH_JENKINS; fch->hashfuncs[0] = CMPH_HASH_JENKINS;
fch->hashfuncs[1] = CMPH_HASH_JENKINS; fch->hashfuncs[1] = CMPH_HASH_JENKINS;
@ -50,7 +50,7 @@ void fch_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
while(*hashptr != CMPH_HASH_COUNT) while(*hashptr != CMPH_HASH_COUNT)
{ {
if (i >= 2) break; //fch only uses two hash functions if (i >= 2) break; //fch only uses two hash functions
fch->hashfuncs[i] = *hashptr; fch->hashfuncs[i] = *hashptr;
++i, ++hashptr; ++i, ++hashptr;
} }
} }
@ -88,36 +88,36 @@ static fch_buckets_t * mapping(cmph_config_t *mph)
fch_buckets_t *buckets = NULL; fch_buckets_t *buckets = NULL;
fch_config_data_t *fch = (fch_config_data_t *)mph->data; fch_config_data_t *fch = (fch_config_data_t *)mph->data;
if (fch->h1) hash_state_destroy(fch->h1); if (fch->h1) hash_state_destroy(fch->h1);
fch->h1 = hash_state_new(fch->hashfuncs[0], fch->m); fch->h1 = hash_state_new(fch->hashfuncs[0], fch->m);
fch->b = fch_calc_b(fch->c, fch->m); fch->b = fch_calc_b(fch->c, fch->m);
fch->p1 = fch_calc_p1(fch->m); fch->p1 = fch_calc_p1(fch->m);
fch->p2 = fch_calc_p2(fch->b); fch->p2 = fch_calc_p2(fch->b);
//DEBUGP("b:%u p1:%f p2:%f\n", fch->b, fch->p1, fch->p2); //DEBUGP("b:%u p1:%f p2:%f\n", fch->b, fch->p1, fch->p2);
buckets = fch_buckets_new(fch->b); buckets = fch_buckets_new(fch->b);
mph->key_source->rewind(mph->key_source->data); mph->key_source->rewind(mph->key_source->data);
for(i = 0; i < fch->m; i++) for(i = 0; i < fch->m; i++)
{ {
cmph_uint32 h1, keylen; cmph_uint32 h1, keylen;
char *key = NULL; char *key = NULL;
mph->key_source->read(mph->key_source->data, &key, &keylen); mph->key_source->read(mph->key_source->data, &key, &keylen);
h1 = hash(fch->h1, key, keylen) % fch->m; h1 = hash(fch->h1, key, keylen) % fch->m;
h1 = mixh10h11h12 (fch->b, fch->p1, fch->p2, h1); h1 = mixh10h11h12 (fch->b, fch->p1, fch->p2, h1);
fch_buckets_insert(buckets, h1, key, keylen); fch_buckets_insert(buckets, h1, key, keylen);
key = NULL; // transger memory ownership key = NULL; // transger memory ownership
} }
return buckets; return buckets;
} }
// returns the buckets indexes sorted by their sizes. // returns the buckets indexes sorted by their sizes.
static cmph_uint32 * ordering(fch_buckets_t * buckets) static cmph_uint32 * ordering(fch_buckets_t * buckets)
{ {
return fch_buckets_get_indexes_sorted_by_size(buckets); return fch_buckets_get_indexes_sorted_by_size(buckets);
} }
/* Check whether function h2 causes collisions among the keys of each bucket */ /* Check whether function h2 causes collisions among the keys of each bucket */
static cmph_uint8 check_for_collisions_h2(fch_config_data_t *fch, fch_buckets_t * buckets, cmph_uint32 *sorted_indexes) static cmph_uint8 check_for_collisions_h2(fch_config_data_t *fch, fch_buckets_t * buckets, cmph_uint32 *sorted_indexes)
{ {
//cmph_uint32 max_size = fch_buckets_get_max_size(buckets); //cmph_uint32 max_size = fch_buckets_get_max_size(buckets);
@ -146,7 +146,7 @@ static cmph_uint8 check_for_collisions_h2(fch_config_data_t *fch, fch_buckets_t
} }
static void permut(cmph_uint32 * vector, cmph_uint32 n) static void permut(cmph_uint32 * vector, cmph_uint32 n)
{ {
cmph_uint32 i, j, b; cmph_uint32 i, j, b;
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
j = (cmph_uint32) rand() % n; j = (cmph_uint32) rand() % n;
@ -179,12 +179,12 @@ static cmph_uint8 searching(fch_config_data_t *fch, fch_buckets_t *buckets, cmph
{ {
map_table[random_table[i]] = i; map_table[random_table[i]] = i;
} }
do { do {
if (fch->h2) hash_state_destroy(fch->h2); if (fch->h2) hash_state_destroy(fch->h2);
fch->h2 = hash_state_new(fch->hashfuncs[1], fch->m); fch->h2 = hash_state_new(fch->hashfuncs[1], fch->m);
restart = check_for_collisions_h2(fch, buckets, sorted_indexes); restart = check_for_collisions_h2(fch, buckets, sorted_indexes);
filled_count = 0; filled_count = 0;
if (!restart) if (!restart)
{ {
searching_iterations++; iteration_to_generate_h2 = 0; searching_iterations++; iteration_to_generate_h2 = 0;
//DEBUGP("searching_iterations: %u\n", searching_iterations); //DEBUGP("searching_iterations: %u\n", searching_iterations);
@ -192,7 +192,7 @@ static cmph_uint8 searching(fch_config_data_t *fch, fch_buckets_t *buckets, cmph
else { else {
iteration_to_generate_h2++; iteration_to_generate_h2++;
//DEBUGP("iteration_to_generate_h2: %u\n", iteration_to_generate_h2); //DEBUGP("iteration_to_generate_h2: %u\n", iteration_to_generate_h2);
} }
for(i = 0; (i < nbuckets) && !restart; i++) { for(i = 0; (i < nbuckets) && !restart; i++) {
cmph_uint32 bucketsize = fch_buckets_get_size(buckets, sorted_indexes[i]); cmph_uint32 bucketsize = fch_buckets_get_size(buckets, sorted_indexes[i]);
if (bucketsize == 0) if (bucketsize == 0)
@ -204,8 +204,8 @@ static cmph_uint8 searching(fch_config_data_t *fch, fch_buckets_t *buckets, cmph
for(z = 0; (z < (fch->m - filled_count)) && restart; z++) { for(z = 0; (z < (fch->m - filled_count)) && restart; z++) {
char * key = fch_buckets_get_key(buckets, sorted_indexes[i], INDEX); char * key = fch_buckets_get_key(buckets, sorted_indexes[i], INDEX);
cmph_uint32 keylen = fch_buckets_get_keylength(buckets, sorted_indexes[i], INDEX); cmph_uint32 keylen = fch_buckets_get_keylength(buckets, sorted_indexes[i], INDEX);
cmph_uint32 h2 = hash(fch->h2, key, keylen) % fch->m; cmph_uint32 h2 = hash(fch->h2, key, keylen) % fch->m;
counter = 0; counter = 0;
restart = 0; // false restart = 0; // false
fch->g[sorted_indexes[i]] = (fch->m + random_table[filled_count + z] - h2) % fch->m; fch->g[sorted_indexes[i]] = (fch->m + random_table[filled_count + z] - h2) % fch->m;
//DEBUGP("g[%u]: %u\n", sorted_indexes[i], fch->g[sorted_indexes[i]]); //DEBUGP("g[%u]: %u\n", sorted_indexes[i], fch->g[sorted_indexes[i]]);
@ -217,7 +217,7 @@ static cmph_uint8 searching(fch_config_data_t *fch, fch_buckets_t *buckets, cmph
h2 = hash(fch->h2, key, keylen) % fch->m; h2 = hash(fch->h2, key, keylen) % fch->m;
index = (h2 + fch->g[sorted_indexes[i]]) % fch->m; index = (h2 + fch->g[sorted_indexes[i]]) % fch->m;
//DEBUGP("key:%s keylen:%u index: %u h2:%u bucketsize:%u\n", key, keylen, index, h2, bucketsize); //DEBUGP("key:%s keylen:%u index: %u h2:%u bucketsize:%u\n", key, keylen, index, h2, bucketsize);
if (map_table[index] >= filled_count) { if (map_table[index] >= filled_count) {
cmph_uint32 y = map_table[index]; cmph_uint32 y = map_table[index];
cmph_uint32 ry = random_table[y]; cmph_uint32 ry = random_table[y];
random_table[y] = random_table[filled_count]; random_table[y] = random_table[filled_count];
@ -225,19 +225,19 @@ static cmph_uint8 searching(fch_config_data_t *fch, fch_buckets_t *buckets, cmph
map_table[random_table[y]] = y; map_table[random_table[y]] = y;
map_table[random_table[filled_count]] = filled_count; map_table[random_table[filled_count]] = filled_count;
filled_count++; filled_count++;
counter ++; counter ++;
} }
else { else {
restart = 1; // true restart = 1; // true
filled_count = filled_count - counter; filled_count = filled_count - counter;
counter = 0; counter = 0;
break; break;
} }
j = (j + 1) % bucketsize; j = (j + 1) % bucketsize;
} while(j % bucketsize != INDEX); } while(j % bucketsize != INDEX);
} }
//getchar(); //getchar();
} }
} while(restart && (searching_iterations < 10) && (iteration_to_generate_h2 < 1000)); } while(restart && (searching_iterations < 10) && (iteration_to_generate_h2 < 1000));
free(map_table); free(map_table);
free(random_table); free(random_table);
@ -264,7 +264,7 @@ cmph_t *fch_new(cmph_config_t *mph, double c)
fch->h2 = NULL; fch->h2 = NULL;
fch->g = NULL; fch->g = NULL;
do do
{ {
if (mph->verbosity) if (mph->verbosity)
{ {
fprintf(stderr, "Entering mapping step for mph creation of %u keys\n", fch->m); fprintf(stderr, "Entering mapping step for mph creation of %u keys\n", fch->m);
@ -283,7 +283,7 @@ cmph_t *fch_new(cmph_config_t *mph, double c)
} }
restart_mapping = searching(fch, buckets, sorted_indexes); restart_mapping = searching(fch, buckets, sorted_indexes);
iterations--; iterations--;
} while(restart_mapping && iterations > 0); } while(restart_mapping && iterations > 0);
if (buckets) fch_buckets_destroy(buckets); if (buckets) fch_buckets_destroy(buckets);
if (sorted_indexes) free (sorted_indexes); if (sorted_indexes) free (sorted_indexes);
@ -317,7 +317,7 @@ int fch_dump(cmph_t *mphf, FILE *fd)
char *buf = NULL; char *buf = NULL;
cmph_uint32 buflen; cmph_uint32 buflen;
register size_t nbytes; register size_t nbytes;
fch_data_t *data = (fch_data_t *)mphf->data; fch_data_t *data = (fch_data_t *)mphf->data;
__cmph_dump(mphf, fd); __cmph_dump(mphf, fd);
@ -365,7 +365,7 @@ void fch_load(FILE *f, cmph_t *mphf)
nbytes = fread(buf, (size_t)buflen, (size_t)1, f); nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
fch->h1 = hash_state_load(buf, buflen); fch->h1 = hash_state_load(buf, buflen);
free(buf); free(buf);
//DEBUGP("Loading fch mphf\n"); //DEBUGP("Loading fch mphf\n");
mphf->data = fch; mphf->data = fch;
//DEBUGP("Reading h2\n"); //DEBUGP("Reading h2\n");
@ -376,8 +376,8 @@ void fch_load(FILE *f, cmph_t *mphf)
nbytes = fread(buf, (size_t)buflen, (size_t)1, f); nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
fch->h2 = hash_state_load(buf, buflen); fch->h2 = hash_state_load(buf, buflen);
free(buf); free(buf);
//DEBUGP("Reading m and n\n"); //DEBUGP("Reading m and n\n");
nbytes = fread(&(fch->m), sizeof(cmph_uint32), (size_t)1, f); nbytes = fread(&(fch->m), sizeof(cmph_uint32), (size_t)1, f);
nbytes = fread(&(fch->c), sizeof(double), (size_t)1, f); nbytes = fread(&(fch->c), sizeof(double), (size_t)1, f);
@ -418,7 +418,7 @@ void fch_destroy(cmph_t *mphf)
/** \fn void fch_pack(cmph_t *mphf, void *packed_mphf); /** \fn void fch_pack(cmph_t *mphf, void *packed_mphf);
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
* \param mphf pointer to the resulting mphf * \param mphf pointer to the resulting mphf
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
*/ */
void fch_pack(cmph_t *mphf, void *packed_mphf) void fch_pack(cmph_t *mphf, void *packed_mphf)
{ {
@ -450,37 +450,37 @@ void fch_pack(cmph_t *mphf, void *packed_mphf)
// packing b // packing b
*((cmph_uint32 *) ptr) = data->b; *((cmph_uint32 *) ptr) = data->b;
ptr += sizeof(data->b); ptr += sizeof(data->b);
// packing p1 // packing p1
*((cmph_uint64 *)ptr) = (cmph_uint64)data->p1; *((cmph_uint64 *)ptr) = (cmph_uint64)data->p1;
ptr += sizeof(data->p1); ptr += sizeof(data->p1);
// packing p2 // packing p2
*((cmph_uint64 *)ptr) = (cmph_uint64)data->p2; *((cmph_uint64 *)ptr) = (cmph_uint64)data->p2;
ptr += sizeof(data->p2); ptr += sizeof(data->p2);
// packing g // packing g
memcpy(ptr, data->g, sizeof(cmph_uint32)*(data->b)); memcpy(ptr, data->g, sizeof(cmph_uint32)*(data->b));
} }
/** \fn cmph_uint32 fch_packed_size(cmph_t *mphf); /** \fn cmph_uint32 fch_packed_size(cmph_t *mphf);
* \brief Return the amount of space needed to pack mphf. * \brief Return the amount of space needed to pack mphf.
* \param mphf pointer to a mphf * \param mphf pointer to a mphf
* \return the size of the packed function or zero for failures * \return the size of the packed function or zero for failures
*/ */
cmph_uint32 fch_packed_size(cmph_t *mphf) cmph_uint32 fch_packed_size(cmph_t *mphf)
{ {
fch_data_t *data = (fch_data_t *)mphf->data; fch_data_t *data = (fch_data_t *)mphf->data;
CMPH_HASH h1_type = hash_get_type(data->h1); CMPH_HASH h1_type = hash_get_type(data->h1);
CMPH_HASH h2_type = hash_get_type(data->h2); CMPH_HASH h2_type = hash_get_type(data->h2);
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) + return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
4*sizeof(cmph_uint32) + 2*sizeof(double) + sizeof(cmph_uint32)*(data->b)); 4*sizeof(cmph_uint32) + 2*sizeof(double) + sizeof(cmph_uint32)*(data->b));
} }
/** cmph_uint32 fch_search(void *packed_mphf, const char *key, cmph_uint32 keylen); /** cmph_uint32 fch_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
* \brief Use the packed mphf to do a search. * \brief Use the packed mphf to do a search.
* \param packed_mphf pointer to the packed mphf * \param packed_mphf pointer to the packed mphf
* \param key key to be hashed * \param key key to be hashed
* \param keylen key legth in bytes * \param keylen key legth in bytes
@ -495,12 +495,12 @@ cmph_uint32 fch_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke
register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type); register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr); register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr);
h2_ptr += 4; h2_ptr += 4;
register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type));
register cmph_uint32 m = *g_ptr++;
register cmph_uint32 b = *g_ptr++; register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type));
register cmph_uint32 m = *g_ptr++;
register cmph_uint32 b = *g_ptr++;
register double p1 = (double)(*((cmph_uint64 *)g_ptr)); register double p1 = (double)(*((cmph_uint64 *)g_ptr));
g_ptr += 2; g_ptr += 2;
@ -508,10 +508,9 @@ cmph_uint32 fch_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke
register double p2 = (double)(*((cmph_uint64 *)g_ptr)); register double p2 = (double)(*((cmph_uint64 *)g_ptr));
g_ptr += 2; g_ptr += 2;
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % m; register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % m;
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % m; register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % m;
h1 = mixh10h11h12 (b, p1, p2, h1); h1 = mixh10h11h12 (b, p1, p2, h1);
return (h2 + g_ptr[h1]) % m; return (h2 + g_ptr[h1]) % m;
} }

View File

@ -20,7 +20,7 @@ typedef struct __fch_bucket_t
static void fch_bucket_new(fch_bucket_t *bucket) static void fch_bucket_new(fch_bucket_t *bucket)
{ {
assert(bucket); assert(bucket);
bucket->size = 0; bucket->size = 0;
@ -109,16 +109,16 @@ struct __fch_buckets_t
{ {
fch_bucket_t * values; fch_bucket_t * values;
cmph_uint32 nbuckets, max_size; cmph_uint32 nbuckets, max_size;
}; };
fch_buckets_t * fch_buckets_new(cmph_uint32 nbuckets) fch_buckets_t * fch_buckets_new(cmph_uint32 nbuckets)
{ {
cmph_uint32 i; cmph_uint32 i;
fch_buckets_t *buckets = (fch_buckets_t *)malloc(sizeof(fch_buckets_t)); fch_buckets_t *buckets = (fch_buckets_t *)malloc(sizeof(fch_buckets_t));
assert(buckets); if (!buckets) return NULL;
buckets->values = (fch_bucket_t *)calloc((size_t)nbuckets, sizeof(fch_bucket_t)); buckets->values = (fch_bucket_t *)calloc((size_t)nbuckets, sizeof(fch_bucket_t));
for (i = 0; i < nbuckets; i++) fch_bucket_new(buckets->values + i); for (i = 0; i < nbuckets; i++) fch_bucket_new(buckets->values + i);
assert(buckets->values); assert(buckets->values);
buckets->nbuckets = nbuckets; buckets->nbuckets = nbuckets;
buckets->max_size = 0; buckets->max_size = 0;
@ -135,7 +135,7 @@ void fch_buckets_insert(fch_buckets_t * buckets, cmph_uint32 index, char * key,
{ {
assert(index < buckets->nbuckets); assert(index < buckets->nbuckets);
fch_bucket_insert(buckets->values + index, key, length); fch_bucket_insert(buckets->values + index, key, length);
if (fch_bucket_size(buckets->values + index) > buckets->max_size) if (fch_bucket_size(buckets->values + index) > buckets->max_size)
{ {
buckets->max_size = fch_bucket_size(buckets->values + index); buckets->max_size = fch_bucket_size(buckets->values + index);
} }
@ -170,16 +170,16 @@ cmph_uint32 fch_buckets_get_nbuckets(fch_buckets_t * buckets)
return buckets->nbuckets; return buckets->nbuckets;
} }
cmph_uint32 * fch_buckets_get_indexes_sorted_by_size(fch_buckets_t * buckets) cmph_uint32 * fch_buckets_get_indexes_sorted_by_size(fch_buckets_t * buckets)
{ {
cmph_uint32 i = 0; cmph_uint32 i = 0;
cmph_uint32 sum = 0, value; cmph_uint32 sum = 0, value;
cmph_uint32 *nbuckets_size = (cmph_uint32 *) calloc((size_t)buckets->max_size + 1, sizeof(cmph_uint32)); cmph_uint32 *nbuckets_size = (cmph_uint32 *) calloc((size_t)buckets->max_size + 1, sizeof(cmph_uint32));
cmph_uint32 * sorted_indexes = (cmph_uint32 *) calloc((size_t)buckets->nbuckets, sizeof(cmph_uint32)); cmph_uint32 * sorted_indexes = (cmph_uint32 *) calloc((size_t)buckets->nbuckets, sizeof(cmph_uint32));
// collect how many buckets for each size. // collect how many buckets for each size.
for(i = 0; i < buckets->nbuckets; i++) nbuckets_size[fch_bucket_size(buckets->values + i)] ++; for(i = 0; i < buckets->nbuckets; i++) nbuckets_size[fch_bucket_size(buckets->values + i)] ++;
// calculating offset considering a decreasing order of buckets size. // calculating offset considering a decreasing order of buckets size.
value = nbuckets_size[buckets->max_size]; value = nbuckets_size[buckets->max_size];
nbuckets_size[buckets->max_size] = sum; nbuckets_size[buckets->max_size] = sum;
@ -188,13 +188,13 @@ cmph_uint32 * fch_buckets_get_indexes_sorted_by_size(fch_buckets_t * buckets)
sum += value; sum += value;
value = nbuckets_size[i]; value = nbuckets_size[i];
nbuckets_size[i] = sum; nbuckets_size[i] = sum;
} }
for(i = 0; i < buckets->nbuckets; i++) for(i = 0; i < buckets->nbuckets; i++)
{ {
sorted_indexes[nbuckets_size[fch_bucket_size(buckets->values + i)]] = (cmph_uint32)i; sorted_indexes[nbuckets_size[fch_bucket_size(buckets->values + i)]] = (cmph_uint32)i;
nbuckets_size[fch_bucket_size(buckets->values + i)] ++; nbuckets_size[fch_bucket_size(buckets->values + i)] ++;
} }
free(nbuckets_size); free(nbuckets_size);
return sorted_indexes; return sorted_indexes;
} }
@ -208,7 +208,7 @@ void fch_buckets_print(fch_buckets_t * buckets)
void fch_buckets_destroy(fch_buckets_t * buckets) void fch_buckets_destroy(fch_buckets_t * buckets)
{ {
cmph_uint32 i; cmph_uint32 i;
for (i = 0; i < buckets->nbuckets; i++) fch_bucket_destroy(buckets->values + i); for (i = 0; i < buckets->nbuckets; i++) fch_bucket_destroy(buckets->values + i);
free(buckets->values); free(buckets->values);
free(buckets); free(buckets);
} }

View File

@ -4,6 +4,7 @@
fnv_state_t *fnv_state_new() fnv_state_t *fnv_state_new()
{ {
fnv_state_t *state = (fnv_state_t *)malloc(sizeof(fnv_state_t)); fnv_state_t *state = (fnv_state_t *)malloc(sizeof(fnv_state_t));
if (!state) return NULL;
state->hashfunc = CMPH_HASH_FNV; state->hashfunc = CMPH_HASH_FNV;
return state; return state;
} }
@ -15,13 +16,13 @@ void fnv_state_destroy(fnv_state_t *state)
cmph_uint32 fnv_hash(fnv_state_t *state, const char *k, cmph_uint32 keylen) cmph_uint32 fnv_hash(fnv_state_t *state, const char *k, cmph_uint32 keylen)
{ {
const unsigned char *bp = (const unsigned char *)k; const unsigned char *bp = (const unsigned char *)k;
const unsigned char *be = bp + keylen; const unsigned char *be = bp + keylen;
static unsigned int hval = 0; static unsigned int hval = 0;
while (bp < be) while (bp < be)
{ {
//hval *= 0x01000193; good for non-gcc compiler //hval *= 0x01000193; good for non-gcc compiler
hval += (hval << 1) + (hval << 4) + (hval << 7) + (hval << 8) + (hval << 24); //good for gcc hval += (hval << 1) + (hval << 4) + (hval << 7) + (hval << 8) + (hval << 24); //good for gcc
@ -41,6 +42,7 @@ void fnv_state_dump(fnv_state_t *state, char **buf, cmph_uint32 *buflen)
fnv_state_t * fnv_state_copy(fnv_state_t *src_state) fnv_state_t * fnv_state_copy(fnv_state_t *src_state)
{ {
fnv_state_t *dest_state = (fnv_state_t *)malloc(sizeof(fnv_state_t)); fnv_state_t *dest_state = (fnv_state_t *)malloc(sizeof(fnv_state_t));
if (!dest_state) return NULL;
dest_state->hashfunc = src_state->hashfunc; dest_state->hashfunc = src_state->hashfunc;
return dest_state; return dest_state;
} }

View File

@ -77,7 +77,7 @@ void graph_print(graph_t *g)
printf("%u -> %u\n", g->edges[abs_edge(e, 0)], g->edges[abs_edge(e, 1)]); printf("%u -> %u\n", g->edges[abs_edge(e, 0)], g->edges[abs_edge(e, 1)]);
} }
} }
} }
return; return;
} }
@ -130,7 +130,7 @@ static void del_edge_point(graph_t *g, cmph_uint32 v1, cmph_uint32 v2)
DEBUGP("Deleting edge point %u %u\n", v1, v2); DEBUGP("Deleting edge point %u %u\n", v1, v2);
e = g->first[v1]; e = g->first[v1];
if (check_edge(g, e, v1, v2)) if (check_edge(g, e, v1, v2))
{ {
g->first[v1] = g->next[e]; g->first[v1] = g->next[e];
//g->edges[e] = EMPTY; //g->edges[e] = EMPTY;
@ -151,7 +151,7 @@ static void del_edge_point(graph_t *g, cmph_uint32 v1, cmph_uint32 v2)
DEBUGP("Deleted\n"); DEBUGP("Deleted\n");
} }
void graph_del_edge(graph_t *g, cmph_uint32 v1, cmph_uint32 v2) void graph_del_edge(graph_t *g, cmph_uint32 v1, cmph_uint32 v2)
{ {
g->shrinking = 1; g->shrinking = 1;
@ -163,7 +163,7 @@ void graph_clear_edges(graph_t *g)
{ {
cmph_uint32 i; cmph_uint32 i;
for (i = 0; i < g->nnodes; ++i) g->first[i] = EMPTY; for (i = 0; i < g->nnodes; ++i) g->first[i] = EMPTY;
for (i = 0; i < g->nedges*2; ++i) for (i = 0; i < g->nedges*2; ++i)
{ {
g->edges[i] = EMPTY; g->edges[i] = EMPTY;
g->next[i] = EMPTY; g->next[i] = EMPTY;
@ -178,7 +178,7 @@ static cmph_uint8 find_degree1_edge(graph_t *g, cmph_uint32 v, cmph_uint8 *delet
cmph_uint8 found = 0; cmph_uint8 found = 0;
DEBUGP("Checking degree of vertex %u connected to edge %u\n", v, edge); DEBUGP("Checking degree of vertex %u connected to edge %u\n", v, edge);
if (edge == EMPTY) return 0; if (edge == EMPTY) return 0;
else if (!(GETBIT(deleted, abs_edge(edge, 0)))) else if (!(GETBIT(deleted, abs_edge(edge, 0))))
{ {
found = 1; found = 1;
*e = edge; *e = edge;
@ -206,17 +206,17 @@ static void cyclic_del_edge(graph_t *g, cmph_uint32 v, cmph_uint8 *deleted)
degree1 = find_degree1_edge(g, v1, deleted, &e); degree1 = find_degree1_edge(g, v1, deleted, &e);
if (!degree1) return; if (!degree1) return;
while(1) while(1)
{ {
DEBUGP("Deleting edge %u (%u->%u)\n", e, g->edges[abs_edge(e, 0)], g->edges[abs_edge(e, 1)]); DEBUGP("Deleting edge %u (%u->%u)\n", e, g->edges[abs_edge(e, 0)], g->edges[abs_edge(e, 1)]);
SETBIT(deleted, abs_edge(e, 0)); SETBIT(deleted, abs_edge(e, 0));
v2 = g->edges[abs_edge(e, 0)]; v2 = g->edges[abs_edge(e, 0)];
if (v2 == v1) v2 = g->edges[abs_edge(e, 1)]; if (v2 == v1) v2 = g->edges[abs_edge(e, 1)];
DEBUGP("Checking if second endpoint %u has degree 1\n", v2); DEBUGP("Checking if second endpoint %u has degree 1\n", v2);
degree1 = find_degree1_edge(g, v2, deleted, &e); degree1 = find_degree1_edge(g, v2, deleted, &e);
if (degree1) if (degree1)
{ {
DEBUGP("Inspecting vertex %u\n", v2); DEBUGP("Inspecting vertex %u\n", v2);
v1 = v2; v1 = v2;
@ -240,7 +240,7 @@ int graph_is_cyclic(graph_t *g)
} }
for (i = 0; i < g->nedges; ++i) for (i = 0; i < g->nedges; ++i)
{ {
if (!(GETBIT(deleted, i))) if (!(GETBIT(deleted, i)))
{ {
DEBUGP("Edge %u %u->%u was not deleted\n", i, g->edges[i], g->edges[i + g->nedges]); DEBUGP("Edge %u %u->%u was not deleted\n", i, g->edges[i], g->edges[i + g->nedges]);
free(deleted); free(deleted);
@ -275,15 +275,15 @@ void graph_obtain_critical_nodes(graph_t *g) /* included -- Fabiano*/
for (i = 0; i < g->nedges; ++i) for (i = 0; i < g->nedges; ++i)
{ {
if (!(GETBIT(deleted,i))) if (!(GETBIT(deleted,i)))
{ {
DEBUGP("Edge %u %u->%u belongs to the 2-core\n", i, g->edges[i], g->edges[i + g->nedges]); DEBUGP("Edge %u %u->%u belongs to the 2-core\n", i, g->edges[i], g->edges[i + g->nedges]);
if(!(GETBIT(g->critical_nodes,g->edges[i]))) if(!(GETBIT(g->critical_nodes,g->edges[i])))
{ {
g->ncritical_nodes ++; g->ncritical_nodes ++;
SETBIT(g->critical_nodes,g->edges[i]); SETBIT(g->critical_nodes,g->edges[i]);
} }
if(!(GETBIT(g->critical_nodes,g->edges[i + g->nedges]))) if(!(GETBIT(g->critical_nodes,g->edges[i + g->nedges])))
{ {
g->ncritical_nodes ++; g->ncritical_nodes ++;
SETBIT(g->critical_nodes,g->edges[i + g->nedges]); SETBIT(g->critical_nodes,g->edges[i + g->nedges]);
@ -328,11 +328,9 @@ graph_iterator_t graph_neighbors_it(graph_t *g, cmph_uint32 v)
cmph_uint32 graph_next_neighbor(graph_t *g, graph_iterator_t* it) cmph_uint32 graph_next_neighbor(graph_t *g, graph_iterator_t* it)
{ {
cmph_uint32 ret; cmph_uint32 ret;
if(it->edge == EMPTY) return GRAPH_NO_NEIGHBOR; if(it->edge == EMPTY) return GRAPH_NO_NEIGHBOR;
if (g->edges[it->edge] == it->vertex) ret = g->edges[it->edge + g->nedges]; if (g->edges[it->edge] == it->vertex) ret = g->edges[it->edge + g->nedges];
else ret = g->edges[it->edge]; else ret = g->edges[it->edge];
it->edge = g->next[it->edge]; it->edge = g->next[it->edge];
return ret; return ret;
} }

View File

@ -133,7 +133,7 @@ void hash_state_destroy(hash_state_t *state)
* \brief Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed. * \brief Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed.
* \param state points to the hash function * \param state points to the hash function
* \param hash_packed pointer to the contiguous memory area used to store the hash function. The size of hash_packed must be at least hash_state_packed_size() * \param hash_packed pointer to the contiguous memory area used to store the hash function. The size of hash_packed must be at least hash_state_packed_size()
* *
* Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed. * Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed.
* However, the hash function type must be packed outside. * However, the hash function type must be packed outside.
*/ */
@ -142,20 +142,20 @@ void hash_state_pack(hash_state_t *state, void *hash_packed)
switch (state->hashfunc) switch (state->hashfunc)
{ {
case CMPH_HASH_JENKINS: case CMPH_HASH_JENKINS:
// pack the jenkins hash function // pack the jenkins hash function
jenkins_state_pack((jenkins_state_t *)state, hash_packed); jenkins_state_pack((jenkins_state_t *)state, hash_packed);
break; break;
default: default:
assert(0); assert(0);
} }
return; return;
} }
/** \fn cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc) /** \fn cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc)
* \brief Return the amount of space needed to pack a hash function. * \brief Return the amount of space needed to pack a hash function.
* \param hashfunc function type * \param hashfunc function type
* \return the size of the packed function or zero for failures * \return the size of the packed function or zero for failures
*/ */
cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc) cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc)
{ {
cmph_uint32 size = 0; cmph_uint32 size = 0;
@ -197,7 +197,7 @@ cmph_uint32 hash_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cm
* \param hashes is a pointer to a memory large enough to fit three 32-bit integers. * \param hashes is a pointer to a memory large enough to fit three 32-bit integers.
*/ */
void hash_vector_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes) void hash_vector_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
{ {
switch (hashfunc) switch (hashfunc)
{ {
case CMPH_HASH_JENKINS: case CMPH_HASH_JENKINS:

View File

@ -41,7 +41,7 @@ void hashtree_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
while(*hashptr != CMPH_HASH_COUNT) while(*hashptr != CMPH_HASH_COUNT)
{ {
if (i >= 3) break; //hashtree only uses three hash functions if (i >= 3) break; //hashtree only uses three hash functions
hashtree->hashfuncs[i] = *hashptr; hashtree->hashfuncs[i] = *hashptr;
++i, ++hashptr; ++i, ++hashptr;
} }
} }
@ -55,8 +55,8 @@ cmph_t *hashtree_new(cmph_config_t *mph, double c)
cmph_uint32 iterations = 20; cmph_uint32 iterations = 20;
cmph_uint8 *visited = NULL; cmph_uint8 *visited = NULL;
hashtree_config_data_t *hashtree = (hashtree_config_data_t *)mph->data; hashtree_config_data_t *hashtree = (hashtree_config_data_t *)mph->data;
hashtree->m = mph->key_source->nkeys; hashtree->m = mph->key_source->nkeys;
hashtree->n = ceil(c * mph->key_source->nkeys); hashtree->n = ceil(c * mph->key_source->nkeys);
DEBUGP("m (edges): %u n (vertices): %u c: %f\n", hashtree->m, hashtree->n, c); DEBUGP("m (edges): %u n (vertices): %u c: %f\n", hashtree->m, hashtree->n, c);
hashtree->graph = graph_new(hashtree->n, hashtree->m); hashtree->graph = graph_new(hashtree->n, hashtree->m);
DEBUGP("Created graph\n"); DEBUGP("Created graph\n");
@ -87,12 +87,12 @@ cmph_t *hashtree_new(cmph_config_t *mph, double c)
fprintf(stderr, "Acyclic graph creation failure - %u iterations remaining\n", iterations); fprintf(stderr, "Acyclic graph creation failure - %u iterations remaining\n", iterations);
} }
if (iterations == 0) break; if (iterations == 0) break;
} }
else break; else break;
} }
if (iterations == 0) if (iterations == 0)
{ {
graph_destroy(hashtree->graph); graph_destroy(hashtree->graph);
return NULL; return NULL;
} }
@ -115,7 +115,7 @@ cmph_t *hashtree_new(cmph_config_t *mph, double c)
hashtree_traverse(hashtree, visited, i); hashtree_traverse(hashtree, visited, i);
} }
} }
graph_destroy(hashtree->graph); graph_destroy(hashtree->graph);
free(visited); free(visited);
hashtree->graph = NULL; hashtree->graph = NULL;
@ -144,7 +144,7 @@ static void hashtree_traverse(hashtree_config_data_t *hashtree, cmph_uint8 *visi
graph_iterator_t it = graph_neighbors_it(hashtree->graph, v); graph_iterator_t it = graph_neighbors_it(hashtree->graph, v);
cmph_uint32 neighbor = 0; cmph_uint32 neighbor = 0;
SETBIT(visited,v); SETBIT(visited,v);
DEBUGP("Visiting vertex %u\n", v); DEBUGP("Visiting vertex %u\n", v);
while((neighbor = graph_next_neighbor(hashtree->graph, &it)) != GRAPH_NO_NEIGHBOR) while((neighbor = graph_next_neighbor(hashtree->graph, &it)) != GRAPH_NO_NEIGHBOR)
{ {
@ -157,7 +157,7 @@ static void hashtree_traverse(hashtree_config_data_t *hashtree, cmph_uint8 *visi
hashtree_traverse(hashtree, visited, neighbor); hashtree_traverse(hashtree, visited, neighbor);
} }
} }
static int hashtree_gen_edges(cmph_config_t *mph) static int hashtree_gen_edges(cmph_config_t *mph)
{ {
cmph_uint32 e; cmph_uint32 e;
@ -165,7 +165,7 @@ static int hashtree_gen_edges(cmph_config_t *mph)
int cycles = 0; int cycles = 0;
DEBUGP("Generating edges for %u vertices with hash functions %s and %s\n", hashtree->n, cmph_hash_names[hashtree->hashfuncs[0]], cmph_hash_names[hashtree->hashfuncs[1]]); DEBUGP("Generating edges for %u vertices with hash functions %s and %s\n", hashtree->n, cmph_hash_names[hashtree->hashfuncs[0]], cmph_hash_names[hashtree->hashfuncs[1]]);
graph_clear_edges(hashtree->graph); graph_clear_edges(hashtree->graph);
mph->key_source->rewind(mph->key_source->data); mph->key_source->rewind(mph->key_source->data);
for (e = 0; e < mph->key_source->nkeys; ++e) for (e = 0; e < mph->key_source->nkeys; ++e)
{ {
@ -176,7 +176,7 @@ static int hashtree_gen_edges(cmph_config_t *mph)
h1 = hash(hashtree->hashes[0], key, keylen) % hashtree->n; h1 = hash(hashtree->hashes[0], key, keylen) % hashtree->n;
h2 = hash(hashtree->hashes[1], key, keylen) % hashtree->n; h2 = hash(hashtree->hashes[1], key, keylen) % hashtree->n;
if (h1 == h2) if (++h2 >= hashtree->n) h2 = 0; if (h1 == h2) if (++h2 >= hashtree->n) h2 = 0;
if (h1 == h2) if (h1 == h2)
{ {
if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e); if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e);
mph->key_source->dispose(mph->key_source->data, key, keylen); mph->key_source->dispose(mph->key_source->data, key, keylen);
@ -216,7 +216,7 @@ int hashtree_dump(cmph_t *mphf, FILE *fd)
fwrite(&(data->n), sizeof(cmph_uint32), 1, fd); fwrite(&(data->n), sizeof(cmph_uint32), 1, fd);
fwrite(&(data->m), sizeof(cmph_uint32), 1, fd); fwrite(&(data->m), sizeof(cmph_uint32), 1, fd);
fwrite(data->g, sizeof(cmph_uint32)*data->n, 1, fd); fwrite(data->g, sizeof(cmph_uint32)*data->n, 1, fd);
#ifdef DEBUG #ifdef DEBUG
fprintf(stderr, "G: "); fprintf(stderr, "G: ");
@ -253,8 +253,8 @@ void hashtree_load(FILE *f, cmph_t *mphf)
} }
DEBUGP("Reading m and n\n"); DEBUGP("Reading m and n\n");
fread(&(hashtree->n), sizeof(cmph_uint32), 1, f); fread(&(hashtree->n), sizeof(cmph_uint32), 1, f);
fread(&(hashtree->m), sizeof(cmph_uint32), 1, f); fread(&(hashtree->m), sizeof(cmph_uint32), 1, f);
hashtree->g = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*hashtree->n); hashtree->g = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*hashtree->n);
fread(hashtree->g, hashtree->n*sizeof(cmph_uint32), 1, f); fread(hashtree->g, hashtree->n*sizeof(cmph_uint32), 1, f);
@ -265,7 +265,7 @@ void hashtree_load(FILE *f, cmph_t *mphf)
#endif #endif
return; return;
} }
cmph_uint32 hashtree_search(cmph_t *mphf, const char *key, cmph_uint32 keylen) cmph_uint32 hashtree_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
{ {
@ -280,7 +280,7 @@ cmph_uint32 hashtree_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
void hashtree_destroy(cmph_t *mphf) void hashtree_destroy(cmph_t *mphf)
{ {
hashtree_data_t *data = (hashtree_data_t *)mphf->data; hashtree_data_t *data = (hashtree_data_t *)mphf->data;
free(data->g); free(data->g);
hash_state_destroy(data->hashes[0]); hash_state_destroy(data->hashes[0]);
hash_state_destroy(data->hashes[1]); hash_state_destroy(data->hashes[1]);
free(data->hashes); free(data->hashes);

View File

@ -28,16 +28,16 @@
have at least 1/4 probability of changing. have at least 1/4 probability of changing.
* If mix() is run forward, every bit of c will change between 1/3 and * If mix() is run forward, every bit of c will change between 1/3 and
2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.) 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
mix() was built out of 36 single-cycle latency instructions in a mix() was built out of 36 single-cycle latency instructions in a
structure that could supported 2x parallelism, like so: structure that could supported 2x parallelism, like so:
a -= b; a -= b;
a -= c; x = (c>>13); a -= c; x = (c>>13);
b -= c; a ^= x; b -= c; a ^= x;
b -= a; x = (a<<8); b -= a; x = (a<<8);
c -= a; b ^= x; c -= a; b ^= x;
c -= b; x = (b>>13); c -= b; x = (b>>13);
... ...
Unfortunately, superscalar Pentiums and Sparcs can't take advantage Unfortunately, superscalar Pentiums and Sparcs can't take advantage
of that parallelism. They've also turned some of those single-cycle of that parallelism. They've also turned some of those single-cycle
latency instructions into multi-cycle latency instructions. Still, latency instructions into multi-cycle latency instructions. Still,
this is the fastest good hash I could find. There were about 2^^68 this is the fastest good hash I could find. There were about 2^^68
@ -87,6 +87,7 @@ acceptable. Do NOT use for cryptographic purposes.
jenkins_state_t *jenkins_state_new(cmph_uint32 size) //size of hash table jenkins_state_t *jenkins_state_new(cmph_uint32 size) //size of hash table
{ {
jenkins_state_t *state = (jenkins_state_t *)malloc(sizeof(jenkins_state_t)); jenkins_state_t *state = (jenkins_state_t *)malloc(sizeof(jenkins_state_t));
if (!state) return NULL;
DEBUGP("Initializing jenkins hash\n"); DEBUGP("Initializing jenkins hash\n");
state->seed = ((cmph_uint32)rand() % size); state->seed = ((cmph_uint32)rand() % size);
return state; return state;
@ -121,28 +122,28 @@ static inline void __jenkins_hash_vector(cmph_uint32 seed, const char *k, cmph_u
hashes[2] += length; hashes[2] += length;
switch(len) /* all the case statements fall through */ switch(len) /* all the case statements fall through */
{ {
case 11: case 11:
hashes[2] +=((cmph_uint32)k[10]<<24); hashes[2] +=((cmph_uint32)k[10]<<24);
case 10: case 10:
hashes[2] +=((cmph_uint32)k[9]<<16); hashes[2] +=((cmph_uint32)k[9]<<16);
case 9 : case 9 :
hashes[2] +=((cmph_uint32)k[8]<<8); hashes[2] +=((cmph_uint32)k[8]<<8);
/* the first byte of hashes[2] is reserved for the length */ /* the first byte of hashes[2] is reserved for the length */
case 8 : case 8 :
hashes[1] +=((cmph_uint32)k[7]<<24); hashes[1] +=((cmph_uint32)k[7]<<24);
case 7 : case 7 :
hashes[1] +=((cmph_uint32)k[6]<<16); hashes[1] +=((cmph_uint32)k[6]<<16);
case 6 : case 6 :
hashes[1] +=((cmph_uint32)k[5]<<8); hashes[1] +=((cmph_uint32)k[5]<<8);
case 5 : case 5 :
hashes[1] +=(cmph_uint8) k[4]; hashes[1] +=(cmph_uint8) k[4];
case 4 : case 4 :
hashes[0] +=((cmph_uint32)k[3]<<24); hashes[0] +=((cmph_uint32)k[3]<<24);
case 3 : case 3 :
hashes[0] +=((cmph_uint32)k[2]<<16); hashes[0] +=((cmph_uint32)k[2]<<16);
case 2 : case 2 :
hashes[0] +=((cmph_uint32)k[1]<<8); hashes[0] +=((cmph_uint32)k[1]<<8);
case 1 : case 1 :
hashes[0] +=(cmph_uint8)k[0]; hashes[0] +=(cmph_uint8)k[0];
/* case 0: nothing left to add */ /* case 0: nothing left to add */
} }
@ -158,13 +159,13 @@ cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keyl
/* cmph_uint32 a, b, c; /* cmph_uint32 a, b, c;
cmph_uint32 len, length; cmph_uint32 len, length;
// Set up the internal state // Set up the internal state
length = keylen; length = keylen;
len = length; len = length;
a = b = 0x9e3779b9; // the golden ratio; an arbitrary value a = b = 0x9e3779b9; // the golden ratio; an arbitrary value
c = state->seed; // the previous hash value - seed in our case c = state->seed; // the previous hash value - seed in our case
// handle most of the key // handle most of the key
while (len >= 12) while (len >= 12)
{ {
a += (k[0] +((cmph_uint32)k[1]<<8) +((cmph_uint32)k[2]<<16) +((cmph_uint32)k[3]<<24)); a += (k[0] +((cmph_uint32)k[1]<<8) +((cmph_uint32)k[2]<<16) +((cmph_uint32)k[3]<<24));
@ -176,37 +177,37 @@ cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keyl
// handle the last 11 bytes // handle the last 11 bytes
c += length; c += length;
switch(len) /// all the case statements fall through switch(len) /// all the case statements fall through
{ {
case 11: case 11:
c +=((cmph_uint32)k[10]<<24); c +=((cmph_uint32)k[10]<<24);
case 10: case 10:
c +=((cmph_uint32)k[9]<<16); c +=((cmph_uint32)k[9]<<16);
case 9 : case 9 :
c +=((cmph_uint32)k[8]<<8); c +=((cmph_uint32)k[8]<<8);
// the first byte of c is reserved for the length // the first byte of c is reserved for the length
case 8 : case 8 :
b +=((cmph_uint32)k[7]<<24); b +=((cmph_uint32)k[7]<<24);
case 7 : case 7 :
b +=((cmph_uint32)k[6]<<16); b +=((cmph_uint32)k[6]<<16);
case 6 : case 6 :
b +=((cmph_uint32)k[5]<<8); b +=((cmph_uint32)k[5]<<8);
case 5 : case 5 :
b +=k[4]; b +=k[4];
case 4 : case 4 :
a +=((cmph_uint32)k[3]<<24); a +=((cmph_uint32)k[3]<<24);
case 3 : case 3 :
a +=((cmph_uint32)k[2]<<16); a +=((cmph_uint32)k[2]<<16);
case 2 : case 2 :
a +=((cmph_uint32)k[1]<<8); a +=((cmph_uint32)k[1]<<8);
case 1 : case 1 :
a +=k[0]; a +=k[0];
// case 0: nothing left to add // case 0: nothing left to add
} }
mix(a,b,c); mix(a,b,c);
/// report the result /// report the result
return c; return c;
*/ */
@ -221,7 +222,7 @@ void jenkins_state_dump(jenkins_state_t *state, char **buf, cmph_uint32 *buflen)
{ {
*buflen = sizeof(cmph_uint32); *buflen = sizeof(cmph_uint32);
*buf = (char *)malloc(sizeof(cmph_uint32)); *buf = (char *)malloc(sizeof(cmph_uint32));
if (!*buf) if (!*buf)
{ {
*buflen = UINT_MAX; *buflen = UINT_MAX;
return; return;
@ -252,7 +253,7 @@ jenkins_state_t *jenkins_state_load(const char *buf, cmph_uint32 buflen)
/** \fn void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed); /** \fn void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed);
* \brief Support the ability to pack a jenkins function into a preallocated contiguous memory space pointed by jenkins_packed. * \brief Support the ability to pack a jenkins function into a preallocated contiguous memory space pointed by jenkins_packed.
* \param state points to the jenkins function * \param state points to the jenkins function
* \param jenkins_packed pointer to the contiguous memory area used to store the jenkins function. The size of jenkins_packed must be at least jenkins_state_packed_size() * \param jenkins_packed pointer to the contiguous memory area used to store the jenkins function. The size of jenkins_packed must be at least jenkins_state_packed_size()
*/ */
void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed) void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed)
{ {
@ -265,7 +266,7 @@ void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed)
/** \fn cmph_uint32 jenkins_state_packed_size(jenkins_state_t *state); /** \fn cmph_uint32 jenkins_state_packed_size(jenkins_state_t *state);
* \brief Return the amount of space needed to pack a jenkins function. * \brief Return the amount of space needed to pack a jenkins function.
* \return the size of the packed function or zero for failures * \return the size of the packed function or zero for failures
*/ */
cmph_uint32 jenkins_state_packed_size(void) cmph_uint32 jenkins_state_packed_size(void)
{ {
return sizeof(cmph_uint32); return sizeof(cmph_uint32);

View File

@ -12,6 +12,7 @@ struct __linear_string_map_t {
lsmap_t *lsmap_new() { lsmap_t *lsmap_new() {
lsmap_t* lsmap = (lsmap_t*)malloc(sizeof(lsmap_t)); lsmap_t* lsmap = (lsmap_t*)malloc(sizeof(lsmap_t));
if (!lsmap) return NULL;
lsmap->key = "dummy node"; lsmap->key = "dummy node";
lsmap->next = NULL; lsmap->next = NULL;
return lsmap; return lsmap;
@ -42,7 +43,7 @@ void* lsmap_search(lsmap_t *lsmap, const char *key) {
} }
return NULL; return NULL;
} }
void lsmap_foreach_key(lsmap_t *lsmap, void (*f)(const char*)) { void lsmap_foreach_key(lsmap_t *lsmap, void (*f)(const char*)) {
while (lsmap->next != NULL) { while (lsmap->next != NULL) {
f(lsmap->key); f(lsmap->key);
@ -65,4 +66,3 @@ void lsmap_destroy(lsmap_t *lsmap) {
} }
free(lsmap); free(lsmap);
} }

View File

@ -22,13 +22,13 @@
void usage(const char *prg) void usage(const char *prg)
{ {
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c algorithm_dependent_value][-s seed] ] [-a algorithm] [-M memory_in_MB] [-b algorithm_dependent_value] [-t keys_per_bin] [-d tmp_dir] [-m file.mph] keysfile\n", prg); fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c algorithm_dependent_value][-s seed] ] [-a algorithm] [-M memory_in_MB] [-b algorithm_dependent_value] [-t keys_per_bin] [-d tmp_dir] [-m file.mph] keysfile\n", prg);
} }
void usage_long(const char *prg) void usage_long(const char *prg)
{ {
cmph_uint32 i; cmph_uint32 i;
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c algorithm_dependent_value][-s seed] ] [-a algorithm] [-M memory_in_MB] [-b algorithm_dependent_value] [-t keys_per_bin] [-d tmp_dir] [-m file.mph] keysfile\n", prg); fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c algorithm_dependent_value][-s seed] ] [-a algorithm] [-M memory_in_MB] [-b algorithm_dependent_value] [-t keys_per_bin] [-d tmp_dir] [-m file.mph] keysfile\n", prg);
fprintf(stderr, "Minimum perfect hashing tool\n\n"); fprintf(stderr, "Minimum perfect hashing tool\n\n");
fprintf(stderr, " -h\t print this help message\n"); fprintf(stderr, " -h\t print this help message\n");
fprintf(stderr, " -c\t c value determines:\n"); fprintf(stderr, " -c\t c value determines:\n");
fprintf(stderr, " \t * the number of vertices in the graph for the algorithms BMZ and CHM\n"); fprintf(stderr, " \t * the number of vertices in the graph for the algorithms BMZ and CHM\n");
@ -57,7 +57,7 @@ void usage_long(const char *prg)
fprintf(stderr, " \t and its value should be an integer in the range [1,32]. Default is 4. The\n"); fprintf(stderr, " \t and its value should be an integer in the range [1,32]. Default is 4. The\n");
fprintf(stderr, " \t larger is this value, the slower is the construction of the functions.\n"); fprintf(stderr, " \t larger is this value, the slower is the construction of the functions.\n");
fprintf(stderr, " \t This parameter has no effect for other algorithms.\n\n"); fprintf(stderr, " \t This parameter has no effect for other algorithms.\n\n");
fprintf(stderr, " -t\t set the number of keys per bin for a t-perfect hashing function. A t-perfect\n"); fprintf(stderr, " -t\t set the number of keys per bin for a t-perfect hashing function. A t-perfect\n");
fprintf(stderr, " \t hash function allows at most t collisions in a given bin. This parameter applies\n"); fprintf(stderr, " \t hash function allows at most t collisions in a given bin. This parameter applies\n");
fprintf(stderr, " \t only to the CHD and CHD_PH algorithms. Its value should be an integer in the\n"); fprintf(stderr, " \t only to the CHD and CHD_PH algorithms. Its value should be an integer in the\n");
fprintf(stderr, " \t range [1,128]. Defaul is 1\n"); fprintf(stderr, " \t range [1,128]. Defaul is 1\n");
@ -182,7 +182,7 @@ int main(int argc, char **argv)
break; break;
} }
} }
if (!valid) if (!valid)
{ {
fprintf(stderr, "Invalid mph algorithm: %s. It is not available in version %s\n", optarg, VERSION); fprintf(stderr, "Invalid mph algorithm: %s. It is not available in version %s\n", optarg, VERSION);
return -1; return -1;
@ -204,7 +204,7 @@ int main(int argc, char **argv)
break; break;
} }
} }
if (!valid) if (!valid)
{ {
fprintf(stderr, "Invalid hash function: %s\n", optarg); fprintf(stderr, "Invalid hash function: %s\n", optarg);
return -1; return -1;
@ -223,7 +223,7 @@ int main(int argc, char **argv)
return 1; return 1;
} }
keys_file = argv[optind]; keys_file = argv[optind];
if (seed == UINT_MAX) seed = (cmph_uint32)time(NULL); if (seed == UINT_MAX) seed = (cmph_uint32)time(NULL);
srand(seed); srand(seed);
int ret = 0; int ret = 0;
@ -232,7 +232,7 @@ int main(int argc, char **argv)
mphf_file = (char *)malloc(strlen(keys_file) + 5); mphf_file = (char *)malloc(strlen(keys_file) + 5);
memcpy(mphf_file, keys_file, strlen(keys_file)); memcpy(mphf_file, keys_file, strlen(keys_file));
memcpy(mphf_file + strlen(keys_file), ".mph\0", (size_t)5); memcpy(mphf_file + strlen(keys_file), ".mph\0", (size_t)5);
} }
keys_fd = fopen(keys_file, "r"); keys_fd = fopen(keys_file, "r");
@ -258,7 +258,7 @@ int main(int argc, char **argv)
cmph_config_set_memory_availability(config, memory_availability); cmph_config_set_memory_availability(config, memory_availability);
cmph_config_set_b(config, b); cmph_config_set_b(config, b);
cmph_config_set_keys_per_bin(config, keys_per_bin); cmph_config_set_keys_per_bin(config, keys_per_bin);
//if((mph_algo == CMPH_BMZ || mph_algo == CMPH_BRZ) && c >= 2.0) c=1.15; //if((mph_algo == CMPH_BMZ || mph_algo == CMPH_BRZ) && c >= 2.0) c=1.15;
if(mph_algo == CMPH_BMZ && c >= 2.0) c=1.15; if(mph_algo == CMPH_BMZ && c >= 2.0) c=1.15;
if (c != 0) cmph_config_set_graphsize(config, c); if (c != 0) cmph_config_set_graphsize(config, c);
@ -279,8 +279,8 @@ int main(int argc, char **argv)
free(mphf_file); free(mphf_file);
return -1; return -1;
} }
cmph_dump(mphf, mphf_fd); cmph_dump(mphf, mphf_fd);
cmph_destroy(mphf); cmph_destroy(mphf);
fclose(mphf_fd); fclose(mphf_fd);
} }
else else
@ -329,7 +329,7 @@ int main(int argc, char **argv)
} }
source->dispose(source->data, buf, buflen); source->dispose(source->data, buf, buflen);
} }
cmph_destroy(mphf); cmph_destroy(mphf);
free(hashtable); free(hashtable);
} }
@ -338,5 +338,5 @@ int main(int argc, char **argv)
free(tmp_dir); free(tmp_dir);
cmph_io_nlfile_adapter_destroy(source); cmph_io_nlfile_adapter_destroy(source);
return ret; return ret;
} }

View File

@ -4,6 +4,7 @@
sdbm_state_t *sdbm_state_new() sdbm_state_t *sdbm_state_new()
{ {
sdbm_state_t *state = (sdbm_state_t *)malloc(sizeof(sdbm_state_t)); sdbm_state_t *state = (sdbm_state_t *)malloc(sizeof(sdbm_state_t));
if (!state) return NULL;
state->hashfunc = CMPH_HASH_SDBM; state->hashfunc = CMPH_HASH_SDBM;
return state; return state;
} }

View File

@ -12,7 +12,7 @@ vqueue_t * vqueue_new(cmph_uint32 capacity)
{ {
size_t capacity_plus_one = capacity + 1; size_t capacity_plus_one = capacity + 1;
vqueue_t *q = (vqueue_t *)malloc(sizeof(vqueue_t)); vqueue_t *q = (vqueue_t *)malloc(sizeof(vqueue_t));
assert(q); if (!q) return NULL;
q->values = (cmph_uint32 *)calloc(capacity_plus_one, sizeof(cmph_uint32)); q->values = (cmph_uint32 *)calloc(capacity_plus_one, sizeof(cmph_uint32));
q->beg = q->end = 0; q->beg = q->end = 0;
q->capacity = (cmph_uint32) capacity_plus_one; q->capacity = (cmph_uint32) capacity_plus_one;
@ -43,7 +43,7 @@ void vqueue_print(vqueue_t * q)
cmph_uint32 i; cmph_uint32 i;
for (i = q->beg; i != q->end; i = (i + 1)%q->capacity) for (i = q->beg; i != q->end; i = (i + 1)%q->capacity)
fprintf(stderr, "%u\n", q->values[(i + 1)%q->capacity]); fprintf(stderr, "%u\n", q->values[(i + 1)%q->capacity]);
} }
void vqueue_destroy(vqueue_t *q) void vqueue_destroy(vqueue_t *q)
{ {

View File

@ -76,4 +76,3 @@ void vstack_reserve(vstack_t *stack, cmph_uint32 size)
DEBUGP("Increased\n"); DEBUGP("Increased\n");
} }
} }