1
Fork 0

Aesthetics in C code and replaced some asserts with NULL returns.

This commit is contained in:
Davi de Castro Reis 2011-12-26 19:35:30 -02:00
parent 4e4d36d833
commit 24e645febe
27 changed files with 649 additions and 656 deletions

View File

@ -35,9 +35,9 @@ const cmph_uint8 bdz_lookup_table[] =
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1, 0
};
};
typedef struct
typedef struct
{
cmph_uint32 vertices[3];
cmph_uint32 next_edges[3];
@ -54,12 +54,12 @@ static void bdz_free_queue(bdz_queue_t * queue)
free(*queue);
};
typedef struct
typedef struct
{
cmph_uint32 nedges;
bdz_edge_t * edges;
cmph_uint32 * first_edge;
cmph_uint8 * vert_degree;
cmph_uint8 * vert_degree;
}bdz_graph3_t;
@ -67,7 +67,7 @@ static void bdz_alloc_graph3(bdz_graph3_t * graph3, cmph_uint32 nedges, cmph_uin
{
graph3->edges=malloc(nedges*sizeof(bdz_edge_t));
graph3->first_edge=malloc(nvertices*sizeof(cmph_uint32));
graph3->vert_degree=malloc((size_t)nvertices);
graph3->vert_degree=malloc((size_t)nvertices);
};
static void bdz_init_graph3(bdz_graph3_t * graph3, cmph_uint32 nedges, cmph_uint32 nvertices)
{
@ -136,7 +136,7 @@ static void bdz_remove_edge(bdz_graph3_t * graph3, cmph_uint32 curr_edge)
j=0;
} else if(graph3->edges[edge1].vertices[1]==vert){
j=1;
} else
} else
j=2;
edge1=graph3->edges[edge1].next_edges[j];
};
@ -145,16 +145,16 @@ static void bdz_remove_edge(bdz_graph3_t * graph3, cmph_uint32 curr_edge)
bdz_dump_graph(graph3,graph3->nedges,graph3->nedges+graph3->nedges/4);
exit(-1);
};
if(edge2!=NULL_EDGE){
graph3->edges[edge2].next_edges[j] =
graph3->edges[edge2].next_edges[j] =
graph3->edges[edge1].next_edges[i];
} else
} else
graph3->first_edge[vert]=
graph3->edges[edge1].next_edges[i];
graph3->vert_degree[vert]--;
};
};
static int bdz_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_queue_t queue, bdz_graph3_t* graph3)
@ -170,7 +170,7 @@ static int bdz_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_que
v0=graph3->edges[i].vertices[0];
v1=graph3->edges[i].vertices[1];
v2=graph3->edges[i].vertices[2];
if(graph3->vert_degree[v0]==1 ||
if(graph3->vert_degree[v0]==1 ||
graph3->vert_degree[v1]==1 ||
graph3->vert_degree[v2]==1){
if(!GETBIT(marked_edge,i)) {
@ -196,7 +196,7 @@ static int bdz_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_que
queue[queue_head++]=tmp_edge;
SETBIT(marked_edge,tmp_edge);
};
};
if(graph3->vert_degree[v1]==1) {
tmp_edge=graph3->first_edge[v1];
@ -204,7 +204,7 @@ static int bdz_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_que
queue[queue_head++]=tmp_edge;
SETBIT(marked_edge,tmp_edge);
};
};
if(graph3->vert_degree[v2]==1){
tmp_edge=graph3->first_edge[v2];
@ -227,7 +227,7 @@ bdz_config_data_t *bdz_config_new(void)
{
bdz_config_data_t *bdz;
bdz = (bdz_config_data_t *)malloc(sizeof(bdz_config_data_t));
assert(bdz);
if (!bdz) return NULL;
memset(bdz, 0, sizeof(bdz_config_data_t));
bdz->hashfunc = CMPH_HASH_JENKINS;
bdz->g = NULL;
@ -328,10 +328,10 @@ cmph_t *bdz_new(cmph_config_t *mph, double c)
fprintf(stderr, "acyclic graph creation failure - %u iterations remaining\n", iterations);
}
if (iterations == 0) break;
}
}
else break;
}
if (iterations == 0)
{
bdz_free_queue(&edges);
@ -353,7 +353,7 @@ cmph_t *bdz_new(cmph_config_t *mph, double c)
fprintf(stderr, "Entering ranking step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n);
}
ranking(bdz);
#ifdef CMPH_TIMING
#ifdef CMPH_TIMING
ELAPSED_TIME_IN_SECONDS(&construction_time);
#endif
mphf = (cmph_t *)malloc(sizeof(cmph_t));
@ -381,17 +381,17 @@ cmph_t *bdz_new(cmph_config_t *mph, double c)
}
#ifdef CMPH_TIMING
#ifdef CMPH_TIMING
register cmph_uint32 space_usage = bdz_packed_size(mphf)*8;
register cmph_uint32 keys_per_bucket = 1;
construction_time = construction_time - construction_time_begin;
fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", bdz->m, bdz->m/(double)bdz->n, keys_per_bucket, construction_time, space_usage/(double)bdz->m);
#endif
#endif
return mphf;
}
static int bdz_mapping(cmph_config_t *mph, bdz_graph3_t* graph3, bdz_queue_t queue)
{
cmph_uint32 e;
@ -405,7 +405,7 @@ static int bdz_mapping(cmph_config_t *mph, bdz_graph3_t* graph3, bdz_queue_t que
cmph_uint32 h0, h1, h2;
cmph_uint32 keylen;
char *key = NULL;
mph->key_source->read(mph->key_source->data, &key, &keylen);
mph->key_source->read(mph->key_source->data, &key, &keylen);
hash_vector(bdz->hl, key, keylen,hl);
h0 = hl[0] % bdz->r;
h1 = hl[1] % bdz->r + bdz->r;
@ -414,7 +414,7 @@ static int bdz_mapping(cmph_config_t *mph, bdz_graph3_t* graph3, bdz_queue_t que
mph->key_source->dispose(mph->key_source->data, key, keylen);
bdz_add_edge(graph3,h0,h1,h2);
}
cycles = bdz_generate_queue(bdz->m, bdz->n, queue, graph3);
cycles = bdz_generate_queue(bdz->m, bdz->n, queue, graph3);
return (cycles == 0);
}
@ -426,7 +426,7 @@ static void assigning(bdz_config_data_t *bdz, bdz_graph3_t* graph3, bdz_queue_t
cmph_uint32 v0,v1,v2;
cmph_uint8 * marked_vertices =malloc((size_t)(bdz->n >> 3) + 1);
cmph_uint32 sizeg = (cmph_uint32)ceil(bdz->n/4.0);
bdz->g = (cmph_uint8 *)calloc((size_t)(sizeg), sizeof(cmph_uint8));
bdz->g = (cmph_uint8 *)calloc((size_t)(sizeg), sizeof(cmph_uint8));
memset(marked_vertices, 0, (size_t)(bdz->n >> 3) + 1);
memset(bdz->g, 0xff, (size_t)(sizeg));
@ -439,12 +439,12 @@ static void assigning(bdz_config_data_t *bdz, bdz_graph3_t* graph3, bdz_queue_t
if(!GETBIT(marked_vertices, v0)){
if(!GETBIT(marked_vertices,v1))
{
SETVALUE1(bdz->g, v1, UNASSIGNED);
SETVALUE1(bdz->g, v1, UNASSIGNED);
SETBIT(marked_vertices, v1);
}
if(!GETBIT(marked_vertices,v2))
{
SETVALUE1(bdz->g, v2, UNASSIGNED);
SETVALUE1(bdz->g, v2, UNASSIGNED);
SETBIT(marked_vertices, v2);
}
SETVALUE1(bdz->g, v0, (6-(GETVALUE(bdz->g, v1) + GETVALUE(bdz->g,v2)))%3);
@ -507,7 +507,7 @@ int bdz_dump(cmph_t *mphf, FILE *fd)
nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd);
nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
nbytes = fwrite(&(data->r), sizeof(cmph_uint32), (size_t)1, fd);
cmph_uint32 sizeg = (cmph_uint32)ceil(data->n/4.0);
nbytes = fwrite(data->g, sizeof(cmph_uint8)*sizeg, (size_t)1, fd);
@ -541,12 +541,12 @@ void bdz_load(FILE *f, cmph_t *mphf)
nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
bdz->hl = hash_state_load(buf, buflen);
free(buf);
DEBUGP("Reading m and n\n");
nbytes = fread(&(bdz->n), sizeof(cmph_uint32), (size_t)1, f);
nbytes = fread(&(bdz->m), sizeof(cmph_uint32), (size_t)1, f);
nbytes = fread(&(bdz->r), sizeof(cmph_uint32), (size_t)1, f);
nbytes = fread(&(bdz->n), sizeof(cmph_uint32), (size_t)1, f);
nbytes = fread(&(bdz->m), sizeof(cmph_uint32), (size_t)1, f);
nbytes = fread(&(bdz->r), sizeof(cmph_uint32), (size_t)1, f);
sizeg = (cmph_uint32)ceil(bdz->n/4.0);
bdz->g = (cmph_uint8 *)calloc((size_t)(sizeg), sizeof(cmph_uint8));
nbytes = fread(bdz->g, sizeg*sizeof(cmph_uint8), (size_t)1, f);
@ -566,7 +566,7 @@ void bdz_load(FILE *f, cmph_t *mphf)
#endif
return;
}
static inline cmph_uint32 rank(cmph_uint32 b, cmph_uint32 * ranktable, cmph_uint8 * g, cmph_uint32 vertex)
{
@ -578,17 +578,17 @@ static inline cmph_uint32 rank(cmph_uint32 b, cmph_uint32 * ranktable, cmph_uint
while(beg_idx_b < end_idx_b)
{
base_rank += bdz_lookup_table[*(g + beg_idx_b++)];
}
DEBUGP("base rank %u\n", base_rank);
beg_idx_v = beg_idx_b << 2;
DEBUGP("beg_idx_v %u\n", beg_idx_v);
while(beg_idx_v < vertex)
while(beg_idx_v < vertex)
{
if(GETVALUE(g, beg_idx_v) != UNASSIGNED) base_rank++;
beg_idx_v++;
}
return base_rank;
}
@ -610,7 +610,7 @@ cmph_uint32 bdz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
void bdz_destroy(cmph_t *mphf)
{
bdz_data_t *data = (bdz_data_t *)mphf->data;
free(data->g);
free(data->g);
hash_state_destroy(data->hl);
free(data->ranktable);
free(data);
@ -660,18 +660,18 @@ void bdz_pack(cmph_t *mphf, void *packed_mphf)
* \brief Return the amount of space needed to pack mphf.
* \param mphf pointer to a mphf
* \return the size of the packed function or zero for failures
*/
*/
cmph_uint32 bdz_packed_size(cmph_t *mphf)
{
bdz_data_t *data = (bdz_data_t *)mphf->data;
CMPH_HASH hl_type = hash_get_type(data->hl);
CMPH_HASH hl_type = hash_get_type(data->hl);
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(hl_type) + 3*sizeof(cmph_uint32) + sizeof(cmph_uint32)*(data->ranktablesize) + sizeof(cmph_uint8) + sizeof(cmph_uint8)* (cmph_uint32)(ceil(data->n/4.0)));
}
/** cmph_uint32 bdz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
* \brief Use the packed mphf to do a search.
* \brief Use the packed mphf to do a search.
* \param packed_mphf pointer to the packed mphf
* \param key key to be hashed
* \param keylen key legth in bytes
@ -679,13 +679,13 @@ cmph_uint32 bdz_packed_size(cmph_t *mphf)
*/
cmph_uint32 bdz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
{
register cmph_uint32 vertex;
register CMPH_HASH hl_type = *(cmph_uint32 *)packed_mphf;
register cmph_uint8 *hl_ptr = (cmph_uint8 *)(packed_mphf) + 4;
register cmph_uint32 *ranktable = (cmph_uint32*)(hl_ptr + hash_state_packed_size(hl_type));
register cmph_uint32 r = *ranktable++;
register cmph_uint32 ranktablesize = *ranktable++;
register cmph_uint8 * g = (cmph_uint8 *)(ranktable + ranktablesize);

View File

@ -24,7 +24,7 @@ static cmph_uint8 lookup_table[5][256] = {
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
};
typedef struct
typedef struct
{
cmph_uint32 vertices[3];
cmph_uint32 next_edges[3];
@ -41,12 +41,12 @@ static void bdz_ph_free_queue(bdz_ph_queue_t * queue)
free(*queue);
};
typedef struct
typedef struct
{
cmph_uint32 nedges;
bdz_ph_edge_t * edges;
cmph_uint32 * first_edge;
cmph_uint8 * vert_degree;
cmph_uint8 * vert_degree;
}bdz_ph_graph3_t;
@ -54,7 +54,7 @@ static void bdz_ph_alloc_graph3(bdz_ph_graph3_t * graph3, cmph_uint32 nedges, cm
{
graph3->edges=malloc(nedges*sizeof(bdz_ph_edge_t));
graph3->first_edge=malloc(nvertices*sizeof(cmph_uint32));
graph3->vert_degree=malloc((size_t)nvertices);
graph3->vert_degree=malloc((size_t)nvertices);
};
static void bdz_ph_init_graph3(bdz_ph_graph3_t * graph3, cmph_uint32 nedges, cmph_uint32 nvertices)
{
@ -101,10 +101,10 @@ static void bdz_ph_dump_graph(bdz_ph_graph3_t* graph3, cmph_uint32 nedges, cmph_
printf(" nexts %d %d %d",graph3->edges[i].next_edges[0],
graph3->edges[i].next_edges[1],graph3->edges[i].next_edges[2]);
};
for(i=0;i<nvertices;i++){
printf("\nfirst for vertice %d %d ",i,graph3->first_edge[i]);
};
};
@ -121,7 +121,7 @@ static void bdz_ph_remove_edge(bdz_ph_graph3_t * graph3, cmph_uint32 curr_edge)
j=0;
} else if(graph3->edges[edge1].vertices[1]==vert){
j=1;
} else
} else
j=2;
edge1=graph3->edges[edge1].next_edges[j];
};
@ -130,16 +130,16 @@ static void bdz_ph_remove_edge(bdz_ph_graph3_t * graph3, cmph_uint32 curr_edge)
bdz_ph_dump_graph(graph3,graph3->nedges,graph3->nedges+graph3->nedges/4);
exit(-1);
};
if(edge2!=NULL_EDGE){
graph3->edges[edge2].next_edges[j] =
graph3->edges[edge2].next_edges[j] =
graph3->edges[edge1].next_edges[i];
} else
} else
graph3->first_edge[vert]=
graph3->edges[edge1].next_edges[i];
graph3->vert_degree[vert]--;
};
};
static int bdz_ph_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_ph_queue_t queue, bdz_ph_graph3_t* graph3)
@ -176,7 +176,7 @@ static int bdz_ph_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_
queue[queue_head++]=tmp_edge;
SETBIT(marked_edge,tmp_edge);
};
};
if(graph3->vert_degree[v1]==1) {
tmp_edge=graph3->first_edge[v1];
@ -184,7 +184,7 @@ static int bdz_ph_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_
queue[queue_head++]=tmp_edge;
SETBIT(marked_edge,tmp_edge);
};
};
if(graph3->vert_degree[v2]==1){
tmp_edge=graph3->first_edge[v2];
@ -229,7 +229,7 @@ void bdz_ph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
while(*hashptr != CMPH_HASH_COUNT)
{
if (i >= 1) break; //bdz_ph only uses one linear hash function
bdz_ph->hashfunc = *hashptr;
bdz_ph->hashfunc = *hashptr;
++i, ++hashptr;
}
}
@ -251,16 +251,16 @@ cmph_t *bdz_ph_new(cmph_config_t *mph, double c)
if (c == 0) c = 1.23; // validating restrictions over parameter c.
DEBUGP("c: %f\n", c);
bdz_ph->m = mph->key_source->nkeys;
bdz_ph->r = (cmph_uint32)ceil((c * mph->key_source->nkeys)/3);
bdz_ph->m = mph->key_source->nkeys;
bdz_ph->r = (cmph_uint32)ceil((c * mph->key_source->nkeys)/3);
if ((bdz_ph->r % 2) == 0) bdz_ph->r += 1;
bdz_ph->n = 3*bdz_ph->r;
bdz_ph_alloc_graph3(&graph3, bdz_ph->m, bdz_ph->n);
bdz_ph_alloc_queue(&edges,bdz_ph->m);
DEBUGP("Created hypergraph\n");
DEBUGP("m (edges): %u n (vertices): %u r: %u c: %f \n", bdz_ph->m, bdz_ph->n, bdz_ph->r, c);
// Mapping step
@ -287,10 +287,10 @@ cmph_t *bdz_ph_new(cmph_config_t *mph, double c)
fprintf(stderr, "acyclic graph creation failure - %u iterations remaining\n", iterations);
}
if (iterations == 0) break;
}
}
else break;
}
if (iterations == 0)
{
// free(bdz_ph->g);
@ -308,7 +308,7 @@ cmph_t *bdz_ph_new(cmph_config_t *mph, double c)
bdz_ph_free_queue(&edges);
bdz_ph_free_graph3(&graph3);
if (mph->verbosity)
{
fprintf(stderr, "Starting optimization step\n");
@ -338,23 +338,23 @@ cmph_t *bdz_ph_new(cmph_config_t *mph, double c)
fprintf(stderr, "Successfully generated minimal perfect hash function\n");
}
#ifdef CMPH_TIMING
#ifdef CMPH_TIMING
register cmph_uint32 space_usage = bdz_ph_packed_size(mphf)*8;
register cmph_uint32 keys_per_bucket = 1;
construction_time = construction_time - construction_time_begin;
fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", bdz_ph->m, bdz_ph->m/(double)bdz_ph->n, keys_per_bucket, construction_time, space_usage/(double)bdz_ph->m);
#endif
#endif
return mphf;
}
static int bdz_ph_mapping(cmph_config_t *mph, bdz_ph_graph3_t* graph3, bdz_ph_queue_t queue)
{
cmph_uint32 e;
int cycles = 0;
cmph_uint32 hl[3];
bdz_ph_config_data_t *bdz_ph = (bdz_ph_config_data_t *)mph->data;
bdz_ph_init_graph3(graph3, bdz_ph->m, bdz_ph->n);
mph->key_source->rewind(mph->key_source->data);
@ -363,7 +363,7 @@ static int bdz_ph_mapping(cmph_config_t *mph, bdz_ph_graph3_t* graph3, bdz_ph_qu
cmph_uint32 h0, h1, h2;
cmph_uint32 keylen;
char *key = NULL;
mph->key_source->read(mph->key_source->data, &key, &keylen);
mph->key_source->read(mph->key_source->data, &key, &keylen);
hash_vector(bdz_ph->hl, key, keylen, hl);
h0 = hl[0] % bdz_ph->r;
h1 = hl[1] % bdz_ph->r + bdz_ph->r;
@ -371,7 +371,7 @@ static int bdz_ph_mapping(cmph_config_t *mph, bdz_ph_graph3_t* graph3, bdz_ph_qu
mph->key_source->dispose(mph->key_source->data, key, keylen);
bdz_ph_add_edge(graph3,h0,h1,h2);
}
cycles = bdz_ph_generate_queue(bdz_ph->m, bdz_ph->n, queue, graph3);
cycles = bdz_ph_generate_queue(bdz_ph->m, bdz_ph->n, queue, graph3);
return (cycles == 0);
}
@ -383,7 +383,7 @@ static void assigning(bdz_ph_config_data_t *bdz_ph, bdz_ph_graph3_t* graph3, bdz
cmph_uint32 v0,v1,v2;
cmph_uint8 * marked_vertices =malloc((size_t)(bdz_ph->n >> 3) + 1);
cmph_uint32 sizeg = (cmph_uint32)ceil(bdz_ph->n/4.0);
bdz_ph->g = (cmph_uint8 *)calloc((size_t)sizeg, sizeof(cmph_uint8));
bdz_ph->g = (cmph_uint8 *)calloc((size_t)sizeg, sizeof(cmph_uint8));
memset(marked_vertices, 0, (size_t)(bdz_ph->n >> 3) + 1);
//memset(bdz_ph->g, 0xff, sizeg);
@ -396,14 +396,14 @@ static void assigning(bdz_ph_config_data_t *bdz_ph, bdz_ph_graph3_t* graph3, bdz
if(!GETBIT(marked_vertices, v0)){
if(!GETBIT(marked_vertices,v1))
{
//SETVALUE(bdz_ph->g, v1, UNASSIGNED);
//SETVALUE(bdz_ph->g, v1, UNASSIGNED);
SETBIT(marked_vertices, v1);
}
if(!GETBIT(marked_vertices,v2))
{
//SETVALUE(bdz_ph->g, v2, UNASSIGNED);
//SETVALUE(bdz_ph->g, v2, UNASSIGNED);
SETBIT(marked_vertices, v2);
}
}
SETVALUE0(bdz_ph->g, v0, (6-(GETVALUE(bdz_ph->g, v1) + GETVALUE(bdz_ph->g,v2)))%3);
SETBIT(marked_vertices, v0);
} else if(!GETBIT(marked_vertices, v1)) {
@ -417,7 +417,7 @@ static void assigning(bdz_ph_config_data_t *bdz_ph, bdz_ph_graph3_t* graph3, bdz
}else {
SETVALUE0(bdz_ph->g, v2, (8-(GETVALUE(bdz_ph->g,v0)+GETVALUE(bdz_ph->g, v1)))%3);
SETBIT(marked_vertices, v2);
}
}
DEBUGP("A:%u %u %u -- %u %u %u\n", v0, v1, v2, GETVALUE(bdz_ph->g, v0), GETVALUE(bdz_ph->g, v1), GETVALUE(bdz_ph->g, v2));
};
free(marked_vertices);
@ -428,11 +428,11 @@ static void bdz_ph_optimization(bdz_ph_config_data_t *bdz_ph)
cmph_uint32 i;
cmph_uint8 byte = 0;
cmph_uint32 sizeg = (cmph_uint32)ceil(bdz_ph->n/5.0);
cmph_uint8 * new_g = (cmph_uint8 *)calloc((size_t)sizeg, sizeof(cmph_uint8));
cmph_uint8 * new_g = (cmph_uint8 *)calloc((size_t)sizeg, sizeof(cmph_uint8));
cmph_uint8 value;
cmph_uint32 idx;
for(i = 0; i < bdz_ph->n; i++)
{
for(i = 0; i < bdz_ph->n; i++)
{
idx = i/5;
byte = new_g[idx];
value = GETVALUE(bdz_ph->g, i);
@ -462,7 +462,7 @@ int bdz_ph_dump(cmph_t *mphf, FILE *fd)
nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd);
nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
nbytes = fwrite(&(data->r), sizeof(cmph_uint32), (size_t)1, fd);
sizeg = (cmph_uint32)ceil(data->n/5.0);
sizeg = (cmph_uint32)ceil(data->n/5.0);
nbytes = fwrite(data->g, sizeof(cmph_uint8)*sizeg, (size_t)1, fd);
#ifdef DEBUG
@ -491,19 +491,19 @@ void bdz_ph_load(FILE *f, cmph_t *mphf)
nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
bdz_ph->hl = hash_state_load(buf, buflen);
free(buf);
DEBUGP("Reading m and n\n");
nbytes = fread(&(bdz_ph->n), sizeof(cmph_uint32), (size_t)1, f);
nbytes = fread(&(bdz_ph->m), sizeof(cmph_uint32), (size_t)1, f);
nbytes = fread(&(bdz_ph->r), sizeof(cmph_uint32), (size_t)1, f);
nbytes = fread(&(bdz_ph->n), sizeof(cmph_uint32), (size_t)1, f);
nbytes = fread(&(bdz_ph->m), sizeof(cmph_uint32), (size_t)1, f);
nbytes = fread(&(bdz_ph->r), sizeof(cmph_uint32), (size_t)1, f);
sizeg = (cmph_uint32)ceil(bdz_ph->n/5.0);
bdz_ph->g = (cmph_uint8 *)calloc((size_t)sizeg, sizeof(cmph_uint8));
nbytes = fread(bdz_ph->g, sizeg*sizeof(cmph_uint8), (size_t)1, f);
return;
}
cmph_uint32 bdz_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
{
@ -520,12 +520,12 @@ cmph_uint32 bdz_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
byte0 = bdz_ph->g[hl[0]/5];
byte1 = bdz_ph->g[hl[1]/5];
byte2 = bdz_ph->g[hl[2]/5];
byte0 = lookup_table[hl[0]%5U][byte0];
byte1 = lookup_table[hl[1]%5U][byte1];
byte2 = lookup_table[hl[2]%5U][byte2];
vertex = hl[(byte0 + byte1 + byte2)%3];
return vertex;
}
@ -533,7 +533,7 @@ cmph_uint32 bdz_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
void bdz_ph_destroy(cmph_t *mphf)
{
bdz_ph_data_t *data = (bdz_ph_data_t *)mphf->data;
free(data->g);
free(data->g);
hash_state_destroy(data->hl);
free(data);
free(mphf);
@ -571,17 +571,17 @@ void bdz_ph_pack(cmph_t *mphf, void *packed_mphf)
* \brief Return the amount of space needed to pack mphf.
* \param mphf pointer to a mphf
* \return the size of the packed function or zero for failures
*/
*/
cmph_uint32 bdz_ph_packed_size(cmph_t *mphf)
{
bdz_ph_data_t *data = (bdz_ph_data_t *)mphf->data;
CMPH_HASH hl_type = hash_get_type(data->hl);
CMPH_HASH hl_type = hash_get_type(data->hl);
cmph_uint32 sizeg = (cmph_uint32)ceil(data->n/5.0);
return (cmph_uint32) (sizeof(CMPH_ALGO) + hash_state_packed_size(hl_type) + 2*sizeof(cmph_uint32) + sizeof(cmph_uint8)*sizeg);
}
/** cmph_uint32 bdz_ph_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
* \brief Use the packed mphf to do a search.
* \brief Use the packed mphf to do a search.
* \param packed_mphf pointer to the packed mphf
* \param key key to be hashed
* \param keylen key legth in bytes
@ -589,21 +589,21 @@ cmph_uint32 bdz_ph_packed_size(cmph_t *mphf)
*/
cmph_uint32 bdz_ph_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
{
register CMPH_HASH hl_type = *(cmph_uint32 *)packed_mphf;
register cmph_uint8 *hl_ptr = (cmph_uint8 *)(packed_mphf) + 4;
register cmph_uint8 * ptr = hl_ptr + hash_state_packed_size(hl_type);
register cmph_uint32 r = *((cmph_uint32*) ptr);
register cmph_uint8 * g = ptr + 4;
cmph_uint32 hl[3];
register cmph_uint8 byte0, byte1, byte2;
register cmph_uint32 vertex;
hash_vector_packed(hl_ptr, hl_type, key, keylen, hl);
hl[0] = hl[0] % r;
hl[1] = hl[1] % r + r;
hl[2] = hl[2] % r + (r << 1);
@ -611,11 +611,11 @@ cmph_uint32 bdz_ph_search_packed(void *packed_mphf, const char *key, cmph_uint32
byte0 = g[hl[0]/5];
byte1 = g[hl[1]/5];
byte2 = g[hl[2]/5];
byte0 = lookup_table[hl[0]%5][byte0];
byte1 = lookup_table[hl[1]%5][byte1];
byte2 = lookup_table[hl[2]%5][byte2];
vertex = hl[(byte0 + byte1 + byte2)%3];
return vertex;
}

View File

@ -128,4 +128,3 @@ int main(int argc, char** argv) {
lsmap_destroy(g_created_mphf);
return 0;
}

116
src/bmz.c
View File

@ -24,7 +24,7 @@ bmz_config_data_t *bmz_config_new(void)
{
bmz_config_data_t *bmz = NULL;
bmz = (bmz_config_data_t *)malloc(sizeof(bmz_config_data_t));
assert(bmz);
if (!bmz) return NULL;
memset(bmz, 0, sizeof(bmz_config_data_t));
bmz->hashfuncs[0] = CMPH_HASH_JENKINS;
bmz->hashfuncs[1] = CMPH_HASH_JENKINS;
@ -49,7 +49,7 @@ void bmz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
while(*hashptr != CMPH_HASH_COUNT)
{
if (i >= 2) break; //bmz only uses two hash functions
bmz->hashfuncs[i] = *hashptr;
bmz->hashfuncs[i] = *hashptr;
++i, ++hashptr;
}
}
@ -68,8 +68,8 @@ cmph_t *bmz_new(cmph_config_t *mph, double c)
bmz_config_data_t *bmz = (bmz_config_data_t *)mph->data;
if (c == 0) c = 1.15; // validating restrictions over parameter c.
DEBUGP("c: %f\n", c);
bmz->m = mph->key_source->nkeys;
bmz->n = (cmph_uint32)ceil(c * mph->key_source->nkeys);
bmz->m = mph->key_source->nkeys;
bmz->n = (cmph_uint32)ceil(c * mph->key_source->nkeys);
DEBUGP("m (edges): %u n (vertices): %u c: %f\n", bmz->m, bmz->n, c);
bmz->graph = graph_new(bmz->n, bmz->m);
DEBUGP("Created graph\n");
@ -81,7 +81,7 @@ cmph_t *bmz_new(cmph_config_t *mph, double c)
{
// Mapping step
cmph_uint32 biggest_g_value = 0;
cmph_uint32 biggest_edge_value = 1;
cmph_uint32 biggest_edge_value = 1;
iterations = 100;
if (mph->verbosity)
{
@ -109,12 +109,12 @@ cmph_t *bmz_new(cmph_config_t *mph, double c)
fprintf(stderr, "simple graph creation failure - %u iterations remaining\n", iterations);
}
if (iterations == 0) break;
}
}
else break;
}
if (iterations == 0)
{
graph_destroy(bmz->graph);
graph_destroy(bmz->graph);
return NULL;
}
// Ordering step
@ -155,17 +155,17 @@ cmph_t *bmz_new(cmph_config_t *mph, double c)
}
bmz_traverse_non_critical_nodes(bmz, used_edges, visited); // non_critical_nodes
}
else
else
{
iterations_map--;
if (mph->verbosity) fprintf(stderr, "Restarting mapping step. %u iterations remaining.\n", iterations_map);
}
}
free(used_edges);
free(visited);
} while(restart_mapping && iterations_map > 0);
graph_destroy(bmz->graph);
bmz->graph = NULL;
if (iterations_map == 0)
if (iterations_map == 0)
{
return NULL;
}
@ -212,15 +212,15 @@ static cmph_uint8 bmz_traverse_critical_nodes(bmz_config_data_t *bmz, cmph_uint3
while(!vqueue_is_empty(q))
{
v = vqueue_remove(q);
it = graph_neighbors_it(bmz->graph, v);
it = graph_neighbors_it(bmz->graph, v);
while ((u = graph_next_neighbor(bmz->graph, &it)) != GRAPH_NO_NEIGHBOR)
{
{
if (graph_node_is_critical(bmz->graph, u) && (!GETBIT(visited,u)))
{
collision = 1;
while(collision) // lookahead to resolve collisions
{
next_g = *biggest_g_value + 1;
next_g = *biggest_g_value + 1;
it1 = graph_neighbors_it(bmz->graph, u);
collision = 0;
while((lav = graph_next_neighbor(bmz->graph, &it1)) != GRAPH_NO_NEIGHBOR)
@ -232,7 +232,7 @@ static cmph_uint8 bmz_traverse_critical_nodes(bmz_config_data_t *bmz, cmph_uint3
vqueue_destroy(q);
return 1; // restart mapping step.
}
if (GETBIT(used_edges, (next_g + bmz->g[lav])))
if (GETBIT(used_edges, (next_g + bmz->g[lav])))
{
collision = 1;
break;
@ -240,7 +240,7 @@ static cmph_uint8 bmz_traverse_critical_nodes(bmz_config_data_t *bmz, cmph_uint3
}
}
if (next_g > *biggest_g_value) *biggest_g_value = next_g;
}
}
// Marking used edges...
it1 = graph_neighbors_it(bmz->graph, u);
while((lav = graph_next_neighbor(bmz->graph, &it1)) != GRAPH_NO_NEIGHBOR)
@ -254,9 +254,9 @@ static cmph_uint8 bmz_traverse_critical_nodes(bmz_config_data_t *bmz, cmph_uint3
bmz->g[u] = next_g; // Labelling vertex u.
SETBIT(visited,u);
vqueue_insert(q, u);
}
}
}
}
vqueue_destroy(q);
return 0;
@ -282,22 +282,22 @@ static cmph_uint8 bmz_traverse_critical_nodes_heuristic(bmz_config_data_t *bmz,
while(!vqueue_is_empty(q))
{
v = vqueue_remove(q);
it = graph_neighbors_it(bmz->graph, v);
it = graph_neighbors_it(bmz->graph, v);
while ((u = graph_next_neighbor(bmz->graph, &it)) != GRAPH_NO_NEIGHBOR)
{
{
if (graph_node_is_critical(bmz->graph, u) && (!GETBIT(visited,u)))
{
cmph_uint32 next_g_index = 0;
collision = 1;
while(collision) // lookahead to resolve collisions
{
if (next_g_index < nunused_g_values)
if (next_g_index < nunused_g_values)
{
next_g = unused_g_values[next_g_index++];
next_g = unused_g_values[next_g_index++];
}
else
else
{
next_g = *biggest_g_value + 1;
next_g = *biggest_g_value + 1;
next_g_index = UINT_MAX;
}
it1 = graph_neighbors_it(bmz->graph, u);
@ -312,7 +312,7 @@ static cmph_uint8 bmz_traverse_critical_nodes_heuristic(bmz_config_data_t *bmz,
free(unused_g_values);
return 1; // restart mapping step.
}
if (GETBIT(used_edges, (next_g + bmz->g[lav])))
if (GETBIT(used_edges, (next_g + bmz->g[lav])))
{
collision = 1;
break;
@ -324,13 +324,13 @@ static cmph_uint8 bmz_traverse_critical_nodes_heuristic(bmz_config_data_t *bmz,
if(nunused_g_values == unused_g_values_capacity)
{
unused_g_values = (cmph_uint32 *)realloc(unused_g_values, (unused_g_values_capacity + BUFSIZ)*sizeof(cmph_uint32));
unused_g_values_capacity += BUFSIZ;
}
unused_g_values[nunused_g_values++] = next_g;
unused_g_values_capacity += BUFSIZ;
}
unused_g_values[nunused_g_values++] = next_g;
}
if (next_g > *biggest_g_value) *biggest_g_value = next_g;
}
}
next_g_index--;
if (next_g_index < nunused_g_values) unused_g_values[next_g_index] = unused_g_values[--nunused_g_values];
@ -347,13 +347,13 @@ static cmph_uint8 bmz_traverse_critical_nodes_heuristic(bmz_config_data_t *bmz,
bmz->g[u] = next_g; // Labelling vertex u.
SETBIT(visited, u);
vqueue_insert(q, u);
}
}
}
}
vqueue_destroy(q);
free(unused_g_values);
return 0;
return 0;
}
static cmph_uint32 next_unused_edge(bmz_config_data_t *bmz, cmph_uint8 * used_edges, cmph_uint32 unused_edge_index)
@ -381,8 +381,8 @@ static void bmz_traverse(bmz_config_data_t *bmz, cmph_uint8 * used_edges, cmph_u
SETBIT(visited, neighbor);
(*unused_edge_index)++;
bmz_traverse(bmz, used_edges, neighbor, unused_edge_index, visited);
}
}
}
static void bmz_traverse_non_critical_nodes(bmz_config_data_t *bmz, cmph_uint8 * used_edges, cmph_uint8 * visited)
@ -394,7 +394,7 @@ static void bmz_traverse_non_critical_nodes(bmz_config_data_t *bmz, cmph_uint8 *
{
v1 = graph_vertex_id(bmz->graph, i, 0);
v2 = graph_vertex_id(bmz->graph, i, 1);
if((GETBIT(visited,v1) && GETBIT(visited,v2)) || (!GETBIT(visited,v1) && !GETBIT(visited,v2))) continue;
if((GETBIT(visited,v1) && GETBIT(visited,v2)) || (!GETBIT(visited,v1) && !GETBIT(visited,v2))) continue;
if(GETBIT(visited,v1)) bmz_traverse(bmz, used_edges, v1, &unused_edge_index, visited);
else bmz_traverse(bmz, used_edges, v2, &unused_edge_index, visited);
@ -403,7 +403,7 @@ static void bmz_traverse_non_critical_nodes(bmz_config_data_t *bmz, cmph_uint8 *
for(i = 0; i < bmz->n; i++)
{
if(!GETBIT(visited,i))
{
{
bmz->g[i] = 0;
SETBIT(visited, i);
bmz_traverse(bmz, used_edges, i, &unused_edge_index, visited);
@ -411,14 +411,14 @@ static void bmz_traverse_non_critical_nodes(bmz_config_data_t *bmz, cmph_uint8 *
}
}
static int bmz_gen_edges(cmph_config_t *mph)
{
cmph_uint32 e;
bmz_config_data_t *bmz = (bmz_config_data_t *)mph->data;
cmph_uint8 multiple_edges = 0;
DEBUGP("Generating edges for %u vertices\n", bmz->n);
graph_clear_edges(bmz->graph);
graph_clear_edges(bmz->graph);
mph->key_source->rewind(mph->key_source->data);
for (e = 0; e < mph->key_source->nkeys; ++e)
{
@ -426,12 +426,12 @@ static int bmz_gen_edges(cmph_config_t *mph)
cmph_uint32 keylen;
char *key = NULL;
mph->key_source->read(mph->key_source->data, &key, &keylen);
h1 = hash(bmz->hashes[0], key, keylen) % bmz->n;
h2 = hash(bmz->hashes[1], key, keylen) % bmz->n;
if (h1 == h2) if (++h2 >= bmz->n) h2 = 0;
DEBUGP("key: %.*s h1: %u h2: %u\n", keylen, key, h1, h2);
if (h1 == h2)
if (h1 == h2)
{
if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e);
mph->key_source->dispose(mph->key_source->data, key, keylen);
@ -472,7 +472,7 @@ int bmz_dump(cmph_t *mphf, FILE *fd)
nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd);
nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
nbytes = fwrite(data->g, sizeof(cmph_uint32)*(data->n), (size_t)1, fd);
#ifdef DEBUG
cmph_uint32 i;
@ -510,8 +510,8 @@ void bmz_load(FILE *f, cmph_t *mphf)
}
DEBUGP("Reading m and n\n");
nbytes = fread(&(bmz->n), sizeof(cmph_uint32), (size_t)1, f);
nbytes = fread(&(bmz->m), sizeof(cmph_uint32), (size_t)1, f);
nbytes = fread(&(bmz->n), sizeof(cmph_uint32), (size_t)1, f);
nbytes = fread(&(bmz->m), sizeof(cmph_uint32), (size_t)1, f);
bmz->g = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*bmz->n);
nbytes = fread(bmz->g, bmz->n*sizeof(cmph_uint32), (size_t)1, f);
@ -522,7 +522,7 @@ void bmz_load(FILE *f, cmph_t *mphf)
#endif
return;
}
cmph_uint32 bmz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
{
@ -537,7 +537,7 @@ cmph_uint32 bmz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
void bmz_destroy(cmph_t *mphf)
{
bmz_data_t *data = (bmz_data_t *)mphf->data;
free(data->g);
free(data->g);
hash_state_destroy(data->hashes[0]);
hash_state_destroy(data->hashes[1]);
free(data->hashes);
@ -548,7 +548,7 @@ void bmz_destroy(cmph_t *mphf)
/** \fn void bmz_pack(cmph_t *mphf, void *packed_mphf);
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
* \param mphf pointer to the resulting mphf
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
*/
void bmz_pack(cmph_t *mphf, void *packed_mphf)
{
@ -579,26 +579,26 @@ void bmz_pack(cmph_t *mphf, void *packed_mphf)
ptr += sizeof(data->n);
// packing g
memcpy(ptr, data->g, sizeof(cmph_uint32)*data->n);
memcpy(ptr, data->g, sizeof(cmph_uint32)*data->n);
}
/** \fn cmph_uint32 bmz_packed_size(cmph_t *mphf);
* \brief Return the amount of space needed to pack mphf.
* \param mphf pointer to a mphf
* \return the size of the packed function or zero for failures
*/
*/
cmph_uint32 bmz_packed_size(cmph_t *mphf)
{
bmz_data_t *data = (bmz_data_t *)mphf->data;
CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
3*sizeof(cmph_uint32) + sizeof(cmph_uint32)*data->n);
}
/** cmph_uint32 bmz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
* \brief Use the packed mphf to do a search.
* \brief Use the packed mphf to do a search.
* \param packed_mphf pointer to the packed mphf
* \param key key to be hashed
* \param keylen key legth in bytes
@ -613,13 +613,13 @@ cmph_uint32 bmz_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke
register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr);
h2_ptr += 4;
register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type));
register cmph_uint32 n = *g_ptr++;
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n;
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n;
register cmph_uint32 n = *g_ptr++;
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n;
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n;
if (h1 == h2 && ++h2 > n) h2 = 0;
return (g_ptr[h1] + g_ptr[h2]);
return (g_ptr[h1] + g_ptr[h2]);
}

View File

@ -23,7 +23,7 @@ bmz8_config_data_t *bmz8_config_new(void)
{
bmz8_config_data_t *bmz8;
bmz8 = (bmz8_config_data_t *)malloc(sizeof(bmz8_config_data_t));
assert(bmz8);
if (!bmz8) return NULL;
memset(bmz8, 0, sizeof(bmz8_config_data_t));
bmz8->hashfuncs[0] = CMPH_HASH_JENKINS;
bmz8->hashfuncs[1] = CMPH_HASH_JENKINS;
@ -48,7 +48,7 @@ void bmz8_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
while(*hashptr != CMPH_HASH_COUNT)
{
if (i >= 2) break; //bmz8 only uses two hash functions
bmz8->hashfuncs[i] = *hashptr;
bmz8->hashfuncs[i] = *hashptr;
++i, ++hashptr;
}
}
@ -64,7 +64,7 @@ cmph_t *bmz8_new(cmph_config_t *mph, double c)
cmph_uint8 restart_mapping = 0;
cmph_uint8 * visited = NULL;
bmz8_config_data_t *bmz8 = (bmz8_config_data_t *)mph->data;
if (mph->key_source->nkeys >= 256)
{
if (mph->verbosity) fprintf(stderr, "The number of keys in BMZ8 must be lower than 256.\n");
@ -72,8 +72,8 @@ cmph_t *bmz8_new(cmph_config_t *mph, double c)
}
if (c == 0) c = 1.15; // validating restrictions over parameter c.
DEBUGP("c: %f\n", c);
bmz8->m = (cmph_uint8) mph->key_source->nkeys;
bmz8->n = (cmph_uint8) ceil(c * mph->key_source->nkeys);
bmz8->m = (cmph_uint8) mph->key_source->nkeys;
bmz8->n = (cmph_uint8) ceil(c * mph->key_source->nkeys);
DEBUGP("m (edges): %u n (vertices): %u c: %f\n", bmz8->m, bmz8->n, c);
bmz8->graph = graph_new(bmz8->n, bmz8->m);
DEBUGP("Created graph\n");
@ -113,8 +113,8 @@ cmph_t *bmz8_new(cmph_config_t *mph, double c)
fprintf(stderr, "simple graph creation failure - %u iterations remaining\n", iterations);
}
if (iterations == 0) break;
}
else break;
}
else break;
}
if (iterations == 0)
{
@ -161,19 +161,19 @@ cmph_t *bmz8_new(cmph_config_t *mph, double c)
}
bmz8_traverse_non_critical_nodes(bmz8, used_edges, visited); // non_critical_nodes
}
else
else
{
iterations_map--;
if (mph->verbosity) fprintf(stderr, "Restarting mapping step. %u iterations remaining.\n", iterations_map);
}
}
free(used_edges);
free(visited);
}while(restart_mapping && iterations_map > 0);
graph_destroy(bmz8->graph);
graph_destroy(bmz8->graph);
bmz8->graph = NULL;
if (iterations_map == 0)
if (iterations_map == 0)
{
return NULL;
}
@ -213,15 +213,15 @@ static cmph_uint8 bmz8_traverse_critical_nodes(bmz8_config_data_t *bmz8, cmph_ui
while(!vqueue_is_empty(q))
{
v = vqueue_remove(q);
it = graph_neighbors_it(bmz8->graph, v);
it = graph_neighbors_it(bmz8->graph, v);
while ((u = graph_next_neighbor(bmz8->graph, &it)) != GRAPH_NO_NEIGHBOR)
{
{
if (graph_node_is_critical(bmz8->graph, u) && (!GETBIT(visited,u)))
{
collision = 1;
while(collision) // lookahead to resolve collisions
{
next_g = (cmph_uint8)(*biggest_g_value + 1);
next_g = (cmph_uint8)(*biggest_g_value + 1);
it1 = graph_neighbors_it(bmz8->graph, u);
collision = 0;
while((lav = graph_next_neighbor(bmz8->graph, &it1)) != GRAPH_NO_NEIGHBOR)
@ -233,7 +233,7 @@ static cmph_uint8 bmz8_traverse_critical_nodes(bmz8_config_data_t *bmz8, cmph_ui
vqueue_destroy(q);
return 1; // restart mapping step.
}
if (GETBIT(used_edges, (next_g + bmz8->g[lav])))
if (GETBIT(used_edges, (next_g + bmz8->g[lav])))
{
collision = 1;
break;
@ -241,7 +241,7 @@ static cmph_uint8 bmz8_traverse_critical_nodes(bmz8_config_data_t *bmz8, cmph_ui
}
}
if (next_g > *biggest_g_value) *biggest_g_value = next_g;
}
}
// Marking used edges...
it1 = graph_neighbors_it(bmz8->graph, u);
while((lav = graph_next_neighbor(bmz8->graph, &it1)) != GRAPH_NO_NEIGHBOR)
@ -250,16 +250,16 @@ static cmph_uint8 bmz8_traverse_critical_nodes(bmz8_config_data_t *bmz8, cmph_ui
{
SETBIT(used_edges,(next_g + bmz8->g[lav]));
if(next_g + bmz8->g[lav] > *biggest_edge_value)
if(next_g + bmz8->g[lav] > *biggest_edge_value)
*biggest_edge_value = (cmph_uint8)(next_g + bmz8->g[lav]);
}
}
bmz8->g[u] = next_g; // Labelling vertex u.
SETBIT(visited,u);
vqueue_insert(q, u);
}
}
}
}
vqueue_destroy(q);
return 0;
@ -268,8 +268,8 @@ static cmph_uint8 bmz8_traverse_critical_nodes(bmz8_config_data_t *bmz8, cmph_ui
static cmph_uint8 bmz8_traverse_critical_nodes_heuristic(bmz8_config_data_t *bmz8, cmph_uint32 v, cmph_uint8 * biggest_g_value, cmph_uint8 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited)
{
cmph_uint8 next_g;
cmph_uint32 u;
cmph_uint32 lav;
cmph_uint32 u;
cmph_uint32 lav;
cmph_uint8 collision;
cmph_uint8 * unused_g_values = NULL;
cmph_uint8 unused_g_values_capacity = 0;
@ -280,27 +280,27 @@ static cmph_uint8 bmz8_traverse_critical_nodes_heuristic(bmz8_config_data_t *bmz
DEBUGP("Labelling critical vertices\n");
bmz8->g[v] = (cmph_uint8)(ceil ((double)(*biggest_edge_value)/2) - 1);
SETBIT(visited, v);
next_g = (cmph_uint8)floor((double)(*biggest_edge_value/2));
next_g = (cmph_uint8)floor((double)(*biggest_edge_value/2));
vqueue_insert(q, v);
while(!vqueue_is_empty(q))
{
v = vqueue_remove(q);
it = graph_neighbors_it(bmz8->graph, v);
it = graph_neighbors_it(bmz8->graph, v);
while ((u = graph_next_neighbor(bmz8->graph, &it)) != GRAPH_NO_NEIGHBOR)
{
{
if (graph_node_is_critical(bmz8->graph, u) && (!GETBIT(visited,u)))
{
cmph_uint8 next_g_index = 0;
collision = 1;
while(collision) // lookahead to resolve collisions
{
if (next_g_index < nunused_g_values)
if (next_g_index < nunused_g_values)
{
next_g = unused_g_values[next_g_index++];
}
else
else
{
next_g = (cmph_uint8)(*biggest_g_value + 1);
next_g = (cmph_uint8)(*biggest_g_value + 1);
next_g_index = 255;//UINT_MAX;
}
it1 = graph_neighbors_it(bmz8->graph, u);
@ -315,7 +315,7 @@ static cmph_uint8 bmz8_traverse_critical_nodes_heuristic(bmz8_config_data_t *bmz
free(unused_g_values);
return 1; // restart mapping step.
}
if (GETBIT(used_edges, (next_g + bmz8->g[lav])))
if (GETBIT(used_edges, (next_g + bmz8->g[lav])))
{
collision = 1;
break;
@ -327,14 +327,14 @@ static cmph_uint8 bmz8_traverse_critical_nodes_heuristic(bmz8_config_data_t *bmz
if(nunused_g_values == unused_g_values_capacity)
{
unused_g_values = (cmph_uint8*)realloc(unused_g_values, ((size_t)(unused_g_values_capacity + BUFSIZ))*sizeof(cmph_uint8));
unused_g_values_capacity += (cmph_uint8)BUFSIZ;
}
unused_g_values[nunused_g_values++] = next_g;
unused_g_values_capacity += (cmph_uint8)BUFSIZ;
}
unused_g_values[nunused_g_values++] = next_g;
}
if (next_g > *biggest_g_value) *biggest_g_value = next_g;
}
next_g_index--;
if (next_g_index < nunused_g_values) unused_g_values[next_g_index] = unused_g_values[--nunused_g_values];
@ -345,22 +345,22 @@ static cmph_uint8 bmz8_traverse_critical_nodes_heuristic(bmz8_config_data_t *bmz
if (graph_node_is_critical(bmz8->graph, lav) && GETBIT(visited, lav))
{
SETBIT(used_edges,(next_g + bmz8->g[lav]));
if(next_g + bmz8->g[lav] > *biggest_edge_value)
if(next_g + bmz8->g[lav] > *biggest_edge_value)
*biggest_edge_value = (cmph_uint8)(next_g + bmz8->g[lav]);
}
}
bmz8->g[u] = next_g; // Labelling vertex u.
SETBIT(visited, u);
vqueue_insert(q, u);
}
}
}
}
vqueue_destroy(q);
free(unused_g_values);
return 0;
return 0;
}
static cmph_uint8 next_unused_edge(bmz8_config_data_t *bmz8, cmph_uint8 * used_edges, cmph_uint32 unused_edge_index)
@ -388,8 +388,8 @@ static void bmz8_traverse(bmz8_config_data_t *bmz8, cmph_uint8 * used_edges, cmp
SETBIT(visited, neighbor);
(*unused_edge_index)++;
bmz8_traverse(bmz8, used_edges, neighbor, unused_edge_index, visited);
}
}
}
static void bmz8_traverse_non_critical_nodes(bmz8_config_data_t *bmz8, cmph_uint8 * used_edges, cmph_uint8 * visited)
@ -401,7 +401,7 @@ static void bmz8_traverse_non_critical_nodes(bmz8_config_data_t *bmz8, cmph_uint
{
v1 = (cmph_uint8)graph_vertex_id(bmz8->graph, i, 0);
v2 = (cmph_uint8)graph_vertex_id(bmz8->graph, i, 1);
if((GETBIT(visited,v1) && GETBIT(visited,v2)) || (!GETBIT(visited,v1) && !GETBIT(visited,v2))) continue;
if((GETBIT(visited,v1) && GETBIT(visited,v2)) || (!GETBIT(visited,v1) && !GETBIT(visited,v2))) continue;
if(GETBIT(visited,v1)) bmz8_traverse(bmz8, used_edges, v1, &unused_edge_index, visited);
else bmz8_traverse(bmz8, used_edges, v2, &unused_edge_index, visited);
@ -410,7 +410,7 @@ static void bmz8_traverse_non_critical_nodes(bmz8_config_data_t *bmz8, cmph_uint
for(i = 0; i < bmz8->n; i++)
{
if(!GETBIT(visited,i))
{
{
bmz8->g[i] = 0;
SETBIT(visited, i);
bmz8_traverse(bmz8, used_edges, i, &unused_edge_index, visited);
@ -418,14 +418,14 @@ static void bmz8_traverse_non_critical_nodes(bmz8_config_data_t *bmz8, cmph_uint
}
}
static int bmz8_gen_edges(cmph_config_t *mph)
{
cmph_uint8 e;
bmz8_config_data_t *bmz8 = (bmz8_config_data_t *)mph->data;
cmph_uint8 multiple_edges = 0;
DEBUGP("Generating edges for %u vertices\n", bmz8->n);
graph_clear_edges(bmz8->graph);
graph_clear_edges(bmz8->graph);
mph->key_source->rewind(mph->key_source->data);
for (e = 0; e < mph->key_source->nkeys; ++e)
{
@ -433,12 +433,12 @@ static int bmz8_gen_edges(cmph_config_t *mph)
cmph_uint32 keylen;
char *key = NULL;
mph->key_source->read(mph->key_source->data, &key, &keylen);
// if (key == NULL)fprintf(stderr, "key = %s -- read BMZ\n", key);
h1 = (cmph_uint8)(hash(bmz8->hashes[0], key, keylen) % bmz8->n);
h2 = (cmph_uint8)(hash(bmz8->hashes[1], key, keylen) % bmz8->n);
if (h1 == h2) if (++h2 >= bmz8->n) h2 = 0;
if (h1 == h2)
if (h1 == h2)
{
if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e);
mph->key_source->dispose(mph->key_source->data, key, keylen);
@ -480,7 +480,7 @@ int bmz8_dump(cmph_t *mphf, FILE *fd)
nbytes = fwrite(&(data->n), sizeof(cmph_uint8), (size_t)1, fd);
nbytes = fwrite(&(data->m), sizeof(cmph_uint8), (size_t)1, fd);
nbytes = fwrite(data->g, sizeof(cmph_uint8)*(data->n), (size_t)1, fd);
/* #ifdef DEBUG
fprintf(stderr, "G: ");
@ -518,8 +518,8 @@ void bmz8_load(FILE *f, cmph_t *mphf)
}
DEBUGP("Reading m and n\n");
nbytes = fread(&(bmz8->n), sizeof(cmph_uint8), (size_t)1, f);
nbytes = fread(&(bmz8->m), sizeof(cmph_uint8), (size_t)1, f);
nbytes = fread(&(bmz8->n), sizeof(cmph_uint8), (size_t)1, f);
nbytes = fread(&(bmz8->m), sizeof(cmph_uint8), (size_t)1, f);
bmz8->g = (cmph_uint8 *)malloc(sizeof(cmph_uint8)*bmz8->n);
nbytes = fread(bmz8->g, bmz8->n*sizeof(cmph_uint8), (size_t)1, f);
@ -530,7 +530,7 @@ void bmz8_load(FILE *f, cmph_t *mphf)
#endif
return;
}
cmph_uint8 bmz8_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
{
@ -556,7 +556,7 @@ void bmz8_destroy(cmph_t *mphf)
/** \fn void bmz8_pack(cmph_t *mphf, void *packed_mphf);
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
* \param mphf pointer to the resulting mphf
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
*/
void bmz8_pack(cmph_t *mphf, void *packed_mphf)
{
@ -585,26 +585,26 @@ void bmz8_pack(cmph_t *mphf, void *packed_mphf)
*ptr++ = data->n;
// packing g
memcpy(ptr, data->g, sizeof(cmph_uint8)*data->n);
memcpy(ptr, data->g, sizeof(cmph_uint8)*data->n);
}
/** \fn cmph_uint32 bmz8_packed_size(cmph_t *mphf);
* \brief Return the amount of space needed to pack mphf.
* \param mphf pointer to a mphf
* \return the size of the packed function or zero for failures
*/
*/
cmph_uint32 bmz8_packed_size(cmph_t *mphf)
{
bmz8_data_t *data = (bmz8_data_t *)mphf->data;
CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
2*sizeof(cmph_uint32) + sizeof(cmph_uint8) + sizeof(cmph_uint8)*data->n);
}
/** cmph_uint8 bmz8_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
* \brief Use the packed mphf to do a search.
* \brief Use the packed mphf to do a search.
* \param packed_mphf pointer to the packed mphf
* \param key key to be hashed
* \param keylen key legth in bytes
@ -619,14 +619,14 @@ cmph_uint8 bmz8_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke
register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr);
h2_ptr += 4;
register cmph_uint8 *g_ptr = h2_ptr + hash_state_packed_size(h2_type);
register cmph_uint8 n = *g_ptr++;
register cmph_uint8 h1 = (cmph_uint8)(hash_packed(h1_ptr, h1_type, key, keylen) % n);
register cmph_uint8 h2 = (cmph_uint8)(hash_packed(h2_ptr, h2_type, key, keylen) % n);
register cmph_uint8 n = *g_ptr++;
register cmph_uint8 h1 = (cmph_uint8)(hash_packed(h1_ptr, h1_type, key, keylen) % n);
register cmph_uint8 h2 = (cmph_uint8)(hash_packed(h2_ptr, h2_type, key, keylen) % n);
DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
if (h1 == h2 && ++h2 > n) h2 = 0;
return (cmph_uint8)(g_ptr[h1] + g_ptr[h2]);
return (cmph_uint8)(g_ptr[h1] + g_ptr[h2]);
}

190
src/brz.c
View File

@ -26,8 +26,9 @@ static char * brz_copy_partial_fch_mphf(brz_config_data_t *brz, fch_data_t * fch
static char * brz_copy_partial_bmz8_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_uint32 *buflen);
brz_config_data_t *brz_config_new(void)
{
brz_config_data_t *brz = NULL;
brz_config_data_t *brz = NULL;
brz = (brz_config_data_t *)malloc(sizeof(brz_config_data_t));
if (!brz) return NULL;
brz->algo = CMPH_FCH;
brz->b = 128;
brz->hashfuncs[0] = CMPH_HASH_JENKINS;
@ -42,7 +43,7 @@ brz_config_data_t *brz_config_new(void)
brz->memory_availability = 1024*1024;
brz->tmp_dir = (cmph_uint8 *)calloc((size_t)10, sizeof(cmph_uint8));
brz->mphf_fd = NULL;
strcpy((char *)(brz->tmp_dir), "/var/tmp/");
strcpy((char *)(brz->tmp_dir), "/var/tmp/");
assert(brz);
return brz;
}
@ -63,7 +64,7 @@ void brz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
while(*hashptr != CMPH_HASH_COUNT)
{
if (i >= 3) break; //brz only uses three hash functions
brz->hashfuncs[i] = *hashptr;
brz->hashfuncs[i] = *hashptr;
++i, ++hashptr;
}
}
@ -84,14 +85,14 @@ void brz_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir)
if(tmp_dir[len-1] != '/')
{
brz->tmp_dir = (cmph_uint8 *)calloc((size_t)len+2, sizeof(cmph_uint8));
sprintf((char *)(brz->tmp_dir), "%s/", (char *)tmp_dir);
sprintf((char *)(brz->tmp_dir), "%s/", (char *)tmp_dir);
}
else
{
brz->tmp_dir = (cmph_uint8 *)calloc((size_t)len+1, sizeof(cmph_uint8));
sprintf((char *)(brz->tmp_dir), "%s", (char *)tmp_dir);
sprintf((char *)(brz->tmp_dir), "%s", (char *)tmp_dir);
}
}
}
@ -105,14 +106,14 @@ void brz_config_set_mphf_fd(cmph_config_t *mph, FILE *mphf_fd)
void brz_config_set_b(cmph_config_t *mph, cmph_uint32 b)
{
brz_config_data_t *brz = (brz_config_data_t *)mph->data;
if(b <= 64 || b >= 175)
if(b <= 64 || b >= 175)
{
b = 128;
}
brz->b = (cmph_uint8)b;
}
void brz_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo)
void brz_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo)
{
if (algo == CMPH_BMZ8 || algo == CMPH_FCH) // supported algorithms
{
@ -147,13 +148,13 @@ cmph_t *brz_new(cmph_config_t *mph, double c)
brz->k = (cmph_uint32)ceil(brz->m/((double)brz->b));
DEBUGP("k: %u\n", brz->k);
brz->size = (cmph_uint8 *) calloc((size_t)brz->k, sizeof(cmph_uint8));
// Clustering the keys by graph id.
if (mph->verbosity)
{
fprintf(stderr, "Partioning the set of keys.\n");
fprintf(stderr, "Partioning the set of keys.\n");
}
while(1)
{
int ok;
@ -172,17 +173,17 @@ cmph_t *brz_new(cmph_config_t *mph, double c)
fprintf(stderr, "Failure: A graph with more than 255 keys was created - %u iterations remaining\n", iterations);
}
if (iterations == 0) break;
}
else break;
}
else break;
}
if (iterations == 0)
if (iterations == 0)
{
DEBUGP("Graphs with more than 255 keys were created in all 20 iterations\n");
free(brz->size);
return NULL;
}
DEBUGP("Graphs generated\n");
brz->offset = (cmph_uint32 *)calloc((size_t)brz->k, sizeof(cmph_uint32));
for (i = 1; i < brz->k; ++i)
{
@ -209,7 +210,7 @@ cmph_t *brz_new(cmph_config_t *mph, double c)
brzf->m = brz->m;
brzf->algo = brz->algo;
mphf->data = brzf;
mphf->size = brz->m;
mphf->size = brz->m;
DEBUGP("Successfully generated minimal perfect hash\n");
if (mph->verbosity)
{
@ -240,7 +241,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
cmph_uint32 cur_bucket = 0;
cmph_uint8 nkeys_vd = 0;
cmph_uint8 ** keys_vd = NULL;
mph->key_source->rewind(mph->key_source->data);
DEBUGP("Generating graphs from %u keys\n", brz->m);
// Partitioning
@ -249,7 +250,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
mph->key_source->read(mph->key_source->data, &key, &keylen);
/* Buffers management */
if (memory_usage + keylen + sizeof(keylen) > brz->memory_availability) // flush buffers
if (memory_usage + keylen + sizeof(keylen) > brz->memory_availability) // flush buffers
{
if(mph->verbosity)
{
@ -265,8 +266,8 @@ static int brz_gen_mphf(cmph_config_t *mph)
sum += value;
value = buckets_size[i];
buckets_size[i] = sum;
}
}
memory_usage = 0;
keys_index = (cmph_uint32 *)calloc((size_t)nkeys_in_buffer, sizeof(cmph_uint32));
for(i = 0; i < nkeys_in_buffer; i++)
@ -298,8 +299,8 @@ static int brz_gen_mphf(cmph_config_t *mph)
memcpy(buffer + memory_usage + sizeof(keylen), key, (size_t)keylen);
memory_usage += keylen + (cmph_uint32)sizeof(keylen);
h0 = hash(brz->h0, key, keylen) % brz->k;
if ((brz->size[h0] == MAX_BUCKET_SIZE) || (brz->algo == CMPH_BMZ8 && ((brz->c >= 1.0) && (cmph_uint8)(brz->c * brz->size[h0]) < brz->size[h0])))
if ((brz->size[h0] == MAX_BUCKET_SIZE) || (brz->algo == CMPH_BMZ8 && ((brz->c >= 1.0) && (cmph_uint8)(brz->c * brz->size[h0]) < brz->size[h0])))
{
free(buffer);
free(buckets_size);
@ -310,8 +311,8 @@ static int brz_gen_mphf(cmph_config_t *mph)
nkeys_in_buffer++;
mph->key_source->dispose(mph->key_source->data, key, keylen);
}
if (memory_usage != 0) // flush buffers
{
if (memory_usage != 0) // flush buffers
{
if(mph->verbosity)
{
fprintf(stderr, "Flushing %u\n", nkeys_in_buffer);
@ -370,12 +371,12 @@ static int brz_gen_mphf(cmph_config_t *mph)
nbytes = fwrite(&(brz->algo), sizeof(brz->algo), (size_t)1, brz->mphf_fd);
nbytes = fwrite(&(brz->k), sizeof(cmph_uint32), (size_t)1, brz->mphf_fd); // number of MPHFs
nbytes = fwrite(brz->size, sizeof(cmph_uint8)*(brz->k), (size_t)1, brz->mphf_fd);
//tmp_fds = (FILE **)calloc(nflushes, sizeof(FILE *));
buff_manager = buffer_manager_new(brz->memory_availability, nflushes);
buffer_merge = (cmph_uint8 **)calloc((size_t)nflushes, sizeof(cmph_uint8 *));
buffer_h0 = (cmph_uint32 *)calloc((size_t)nflushes, sizeof(cmph_uint32));
memory_usage = 0;
for(i = 0; i < nflushes; i++)
{
@ -388,7 +389,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
h0 = hash(brz->h0, key+sizeof(keylen), keylen) % brz->k;
buffer_h0[i] = h0;
buffer_merge[i] = (cmph_uint8 *)key;
key = NULL; //transfer memory ownership
key = NULL; //transfer memory ownership
}
e = 0;
keys_vd = (cmph_uint8 **)calloc((size_t)MAX_BUCKET_SIZE, sizeof(cmph_uint8 *));
@ -429,7 +430,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
e++;
buffer_h0[i] = UINT_MAX;
}
if(nkeys_vd == brz->size[cur_bucket]) // Generating mphf for each bucket.
{
cmph_io_adapter_t *source = NULL;
@ -444,7 +445,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
//cmph_config_set_algo(config, CMPH_BMZ8);
cmph_config_set_graphsize(config, brz->c);
mphf_tmp = cmph_new(config);
if (mphf_tmp == NULL)
if (mphf_tmp == NULL)
{
if(mph->verbosity) fprintf(stderr, "ERROR: Can't generate MPHF for bucket %u out of %u\n", cur_bucket + 1, brz->k);
error = 1;
@ -453,9 +454,9 @@ static int brz_gen_mphf(cmph_config_t *mph)
cmph_io_byte_vector_adapter_destroy(source);
break;
}
if(mph->verbosity)
if(mph->verbosity)
{
if (cur_bucket % 1000 == 0)
if (cur_bucket % 1000 == 0)
{
fprintf(stderr, "MPHF for bucket %u out of %u was generated.\n", cur_bucket + 1, brz->k);
}
@ -465,7 +466,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
case CMPH_FCH:
{
fch_data_t * fchf = NULL;
fchf = (fch_data_t *)mphf_tmp->data;
fchf = (fch_data_t *)mphf_tmp->data;
bufmphf = brz_copy_partial_fch_mphf(brz, fchf, cur_bucket, &buflenmphf);
}
break;
@ -516,7 +517,7 @@ static char * brz_copy_partial_fch_mphf(brz_config_data_t *brz, fch_data_t * fch
{
cmph_uint32 i = 0;
cmph_uint32 buflenh1 = 0;
cmph_uint32 buflenh2 = 0;
cmph_uint32 buflenh2 = 0;
char * bufh1 = NULL;
char * bufh2 = NULL;
char * buf = NULL;
@ -528,7 +529,7 @@ static char * brz_copy_partial_fch_mphf(brz_config_data_t *brz, fch_data_t * fch
memcpy(buf, &buflenh1, sizeof(cmph_uint32));
memcpy(buf+sizeof(cmph_uint32), bufh1, (size_t)buflenh1);
memcpy(buf+sizeof(cmph_uint32)+buflenh1, &buflenh2, sizeof(cmph_uint32));
memcpy(buf+2*sizeof(cmph_uint32)+buflenh1, bufh2, (size_t)buflenh2);
memcpy(buf+2*sizeof(cmph_uint32)+buflenh1, bufh2, (size_t)buflenh2);
for (i = 0; i < n; i++) memcpy(buf+2*sizeof(cmph_uint32)+buflenh1+buflenh2+i,(fchf->g + i), (size_t)1);
free(bufh1);
free(bufh2);
@ -537,7 +538,7 @@ static char * brz_copy_partial_fch_mphf(brz_config_data_t *brz, fch_data_t * fch
static char * brz_copy_partial_bmz8_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_uint32 *buflen)
{
cmph_uint32 buflenh1 = 0;
cmph_uint32 buflenh2 = 0;
cmph_uint32 buflenh2 = 0;
char * bufh1 = NULL;
char * bufh2 = NULL;
char * buf = NULL;
@ -572,7 +573,7 @@ int brz_dump(cmph_t *mphf, FILE *fd)
nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd);
free(buf);
// Dumping m and the vector offset.
nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
nbytes = fwrite(data->offset, sizeof(cmph_uint32)*(data->k), (size_t)1, fd);
return 1;
}
@ -591,7 +592,7 @@ void brz_load(FILE *f, cmph_t *mphf)
nbytes = fread(&(brz->algo), sizeof(brz->algo), (size_t)1, f); // Reading algo.
nbytes = fread(&(brz->k), sizeof(cmph_uint32), (size_t)1, f);
brz->size = (cmph_uint8 *) malloc(sizeof(cmph_uint8)*brz->k);
nbytes = fread(brz->size, sizeof(cmph_uint8)*(brz->k), (size_t)1, f);
nbytes = fread(brz->size, sizeof(cmph_uint8)*(brz->k), (size_t)1, f);
brz->h1 = (hash_state_t **)malloc(sizeof(hash_state_t *)*brz->k);
brz->h2 = (hash_state_t **)malloc(sizeof(hash_state_t *)*brz->k);
brz->g = (cmph_uint8 **) calloc((size_t)brz->k, sizeof(cmph_uint8 *));
@ -635,7 +636,7 @@ void brz_load(FILE *f, cmph_t *mphf)
brz->h0 = hash_state_load(buf, buflen);
free(buf);
//loading c, m, and the vector offset.
//loading c, m, and the vector offset.
nbytes = fread(&(brz->m), sizeof(cmph_uint32), (size_t)1, f);
brz->offset = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*brz->k);
nbytes = fread(brz->offset, sizeof(cmph_uint32)*(brz->k), (size_t)1, f);
@ -654,9 +655,9 @@ static cmph_uint32 brz_bmz8_search(brz_data_t *brz, const char *key, cmph_uint32
register cmph_uint32 h1 = hash(brz->h1[h0], key, keylen) % n;
register cmph_uint32 h2 = hash(brz->h2[h0], key, keylen) % n;
register cmph_uint8 mphf_bucket;
if (h1 == h2 && ++h2 >= n) h2 = 0;
mphf_bucket = (cmph_uint8)(brz->g[h0][h1] + brz->g[h0][h2]);
mphf_bucket = (cmph_uint8)(brz->g[h0][h1] + brz->g[h0][h2]);
DEBUGP("key: %s h1: %u h2: %u h0: %u\n", key, h1, h2, h0);
DEBUGP("key: %s g[h1]: %u g[h2]: %u offset[h0]: %u edges: %u\n", key, brz->g[h0][h1], brz->g[h0][h2], brz->offset[h0], brz->m);
DEBUGP("Address: %u\n", mphf_bucket + brz->offset[h0]);
@ -722,61 +723,61 @@ void brz_destroy(cmph_t *mphf)
/** \fn void brz_pack(cmph_t *mphf, void *packed_mphf);
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
* \param mphf pointer to the resulting mphf
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
*/
void brz_pack(cmph_t *mphf, void *packed_mphf)
{
brz_data_t *data = (brz_data_t *)mphf->data;
cmph_uint8 * ptr = packed_mphf;
cmph_uint32 i,n;
// packing internal algo type
memcpy(ptr, &(data->algo), sizeof(data->algo));
ptr += sizeof(data->algo);
// packing h0 type
CMPH_HASH h0_type = hash_get_type(data->h0);
CMPH_HASH h0_type = hash_get_type(data->h0);
memcpy(ptr, &h0_type, sizeof(h0_type));
ptr += sizeof(h0_type);
// packing h0
hash_state_pack(data->h0, ptr);
ptr += hash_state_packed_size(h0_type);
// packing k
memcpy(ptr, &(data->k), sizeof(data->k));
ptr += sizeof(data->k);
// packing c
*((cmph_uint64 *)ptr) = (cmph_uint64)data->c;
*((cmph_uint64 *)ptr) = (cmph_uint64)data->c;
ptr += sizeof(data->c);
// packing h1 type
CMPH_HASH h1_type = hash_get_type(data->h1[0]);
CMPH_HASH h1_type = hash_get_type(data->h1[0]);
memcpy(ptr, &h1_type, sizeof(h1_type));
ptr += sizeof(h1_type);
// packing h2 type
CMPH_HASH h2_type = hash_get_type(data->h2[0]);
CMPH_HASH h2_type = hash_get_type(data->h2[0]);
memcpy(ptr, &h2_type, sizeof(h2_type));
ptr += sizeof(h2_type);
// packing size
memcpy(ptr, data->size, sizeof(cmph_uint8)*data->k);
memcpy(ptr, data->size, sizeof(cmph_uint8)*data->k);
ptr += data->k;
// packing offset
memcpy(ptr, data->offset, sizeof(cmph_uint32)*data->k);
memcpy(ptr, data->offset, sizeof(cmph_uint32)*data->k);
ptr += sizeof(cmph_uint32)*data->k;
#if defined (__ia64) || defined (__x86_64__)
cmph_uint64 * g_is_ptr = (cmph_uint64 *)ptr;
#else
cmph_uint32 * g_is_ptr = (cmph_uint32 *)ptr;
#endif
cmph_uint8 * g_i = (cmph_uint8 *) (g_is_ptr + data->k);
for(i = 0; i < data->k; i++)
{
#if defined (__ia64) || defined (__x86_64__)
@ -787,7 +788,7 @@ void brz_pack(cmph_t *mphf, void *packed_mphf)
// packing h1[i]
hash_state_pack(data->h1[i], g_i);
g_i += hash_state_packed_size(h1_type);
// packing h2[i]
hash_state_pack(data->h2[i], g_i);
g_i += hash_state_packed_size(h2_type);
@ -803,9 +804,9 @@ void brz_pack(cmph_t *mphf, void *packed_mphf)
break;
default: assert(0);
}
memcpy(g_i, data->g[i], sizeof(cmph_uint8)*n);
memcpy(g_i, data->g[i], sizeof(cmph_uint8)*n);
g_i += n;
}
}
@ -814,16 +815,16 @@ void brz_pack(cmph_t *mphf, void *packed_mphf)
* \brief Return the amount of space needed to pack mphf.
* \param mphf pointer to a mphf
* \return the size of the packed function or zero for failures
*/
*/
cmph_uint32 brz_packed_size(cmph_t *mphf)
{
cmph_uint32 i;
cmph_uint32 size = 0;
brz_data_t *data = (brz_data_t *)mphf->data;
CMPH_HASH h0_type = hash_get_type(data->h0);
CMPH_HASH h1_type = hash_get_type(data->h1[0]);
CMPH_HASH h0_type = hash_get_type(data->h0);
CMPH_HASH h1_type = hash_get_type(data->h1[0]);
CMPH_HASH h2_type = hash_get_type(data->h2[0]);
size = (cmph_uint32)(2*sizeof(CMPH_ALGO) + 3*sizeof(CMPH_HASH) + hash_state_packed_size(h0_type) + sizeof(cmph_uint32) +
size = (cmph_uint32)(2*sizeof(CMPH_ALGO) + 3*sizeof(CMPH_HASH) + hash_state_packed_size(h0_type) + sizeof(cmph_uint32) +
sizeof(double) + sizeof(cmph_uint8)*data->k + sizeof(cmph_uint32)*data->k);
// pointers to g_is
#if defined (__ia64) || defined (__x86_64__)
@ -831,10 +832,10 @@ cmph_uint32 brz_packed_size(cmph_t *mphf)
#else
size += (cmph_uint32) sizeof(cmph_uint32)*data->k;
#endif
size += hash_state_packed_size(h1_type) * data->k;
size += hash_state_packed_size(h2_type) * data->k;
cmph_uint32 n = 0;
for(i = 0; i < data->k; i++)
{
@ -848,7 +849,7 @@ cmph_uint32 brz_packed_size(cmph_t *mphf)
break;
default: assert(0);
}
size += n;
size += n;
}
return size;
}
@ -859,28 +860,28 @@ static cmph_uint32 brz_bmz8_search_packed(cmph_uint32 *packed_mphf, const char *
{
register CMPH_HASH h0_type = *packed_mphf++;
register cmph_uint32 *h0_ptr = packed_mphf;
packed_mphf = (cmph_uint32 *)(((cmph_uint8 *)packed_mphf) + hash_state_packed_size(h0_type));
packed_mphf = (cmph_uint32 *)(((cmph_uint8 *)packed_mphf) + hash_state_packed_size(h0_type));
register cmph_uint32 k = *packed_mphf++;
register double c = (double)(*((cmph_uint64*)packed_mphf));
packed_mphf += 2;
register CMPH_HASH h1_type = *packed_mphf++;
register CMPH_HASH h2_type = *packed_mphf++;
register CMPH_HASH h1_type = *packed_mphf++;
register CMPH_HASH h2_type = *packed_mphf++;
register cmph_uint8 * size = (cmph_uint8 *) packed_mphf;
packed_mphf = (cmph_uint32 *)(size + k);
packed_mphf = (cmph_uint32 *)(size + k);
register cmph_uint32 * offset = packed_mphf;
packed_mphf += k;
register cmph_uint32 h0;
hash_vector_packed(h0_ptr, h0_type, key, keylen, fingerprint);
h0 = fingerprint[2] % k;
register cmph_uint32 m = size[h0];
register cmph_uint32 n = (cmph_uint32)ceil(c * m);
@ -889,69 +890,69 @@ static cmph_uint32 brz_bmz8_search_packed(cmph_uint32 *packed_mphf, const char *
#else
register cmph_uint32 * g_is_ptr = packed_mphf;
#endif
register cmph_uint8 * h1_ptr = (cmph_uint8 *) g_is_ptr[h0];
register cmph_uint8 * h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
register cmph_uint8 * g = h2_ptr + hash_state_packed_size(h2_type);
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n;
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n;
register cmph_uint8 mphf_bucket;
if (h1 == h2 && ++h2 >= n) h2 = 0;
mphf_bucket = (cmph_uint8)(g[h1] + g[h2]);
mphf_bucket = (cmph_uint8)(g[h1] + g[h2]);
DEBUGP("key: %s h1: %u h2: %u h0: %u\n", key, h1, h2, h0);
DEBUGP("Address: %u\n", mphf_bucket + offset[h0]);
return (mphf_bucket + offset[h0]);
return (mphf_bucket + offset[h0]);
}
static cmph_uint32 brz_fch_search_packed(cmph_uint32 *packed_mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint)
{
register CMPH_HASH h0_type = *packed_mphf++;
register cmph_uint32 *h0_ptr = packed_mphf;
packed_mphf = (cmph_uint32 *)(((cmph_uint8 *)packed_mphf) + hash_state_packed_size(h0_type));
packed_mphf = (cmph_uint32 *)(((cmph_uint8 *)packed_mphf) + hash_state_packed_size(h0_type));
register cmph_uint32 k = *packed_mphf++;
register double c = (double)(*((cmph_uint64*)packed_mphf));
packed_mphf += 2;
register CMPH_HASH h1_type = *packed_mphf++;
register CMPH_HASH h1_type = *packed_mphf++;
register CMPH_HASH h2_type = *packed_mphf++;
register CMPH_HASH h2_type = *packed_mphf++;
register cmph_uint8 * size = (cmph_uint8 *) packed_mphf;
packed_mphf = (cmph_uint32 *)(size + k);
packed_mphf = (cmph_uint32 *)(size + k);
register cmph_uint32 * offset = packed_mphf;
packed_mphf += k;
register cmph_uint32 h0;
hash_vector_packed(h0_ptr, h0_type, key, keylen, fingerprint);
h0 = fingerprint[2] % k;
register cmph_uint32 m = size[h0];
register cmph_uint32 b = fch_calc_b(c, m);
register double p1 = fch_calc_p1(m);
register double p2 = fch_calc_p2(b);
#if defined (__ia64) || defined (__x86_64__)
register cmph_uint64 * g_is_ptr = (cmph_uint64 *)packed_mphf;
#else
register cmph_uint32 * g_is_ptr = packed_mphf;
#endif
register cmph_uint8 * h1_ptr = (cmph_uint8 *) g_is_ptr[h0];
register cmph_uint8 * h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
register cmph_uint8 * g = h2_ptr + hash_state_packed_size(h2_type);
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % m;
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % m;
@ -962,7 +963,7 @@ static cmph_uint32 brz_fch_search_packed(cmph_uint32 *packed_mphf, const char *k
}
/** cmph_uint32 brz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
* \brief Use the packed mphf to do a search.
* \brief Use the packed mphf to do a search.
* \param packed_mphf pointer to the packed mphf
* \param key key to be hashed
* \param keylen key legth in bytes
@ -970,7 +971,7 @@ static cmph_uint32 brz_fch_search_packed(cmph_uint32 *packed_mphf, const char *k
*/
cmph_uint32 brz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
{
register cmph_uint32 *ptr = (cmph_uint32 *)packed_mphf;
register cmph_uint32 *ptr = (cmph_uint32 *)packed_mphf;
register CMPH_ALGO algo = *ptr++;
cmph_uint32 fingerprint[3];
switch(algo)
@ -982,4 +983,3 @@ cmph_uint32 brz_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke
default: assert(0);
}
}

View File

@ -17,7 +17,7 @@ struct __buffer_entry_t
buffer_entry_t * buffer_entry_new(cmph_uint32 capacity)
{
buffer_entry_t *buff_entry = (buffer_entry_t *)malloc(sizeof(buffer_entry_t));
assert(buff_entry);
if (!buff_entry) return NULL;
buff_entry->fd = NULL;
buff_entry->buff = NULL;
buff_entry->capacity = capacity;
@ -62,7 +62,7 @@ cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry, cmph_uint32 *
free(buf);
return NULL;
}
if((buffer_entry->pos + lacked_bytes) > buffer_entry->nbytes)
if((buffer_entry->pos + lacked_bytes) > buffer_entry->nbytes)
{
copied_bytes = buffer_entry->nbytes - buffer_entry->pos;
lacked_bytes = (buffer_entry->pos + lacked_bytes) - buffer_entry->nbytes;
@ -71,7 +71,7 @@ cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry, cmph_uint32 *
}
memcpy(keylen + copied_bytes, buffer_entry->buff + buffer_entry->pos, (size_t)lacked_bytes);
buffer_entry->pos += lacked_bytes;
lacked_bytes = *keylen;
copied_bytes = 0;
buf = (cmph_uint8 *)malloc(*keylen + sizeof(*keylen));
@ -83,7 +83,7 @@ cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry, cmph_uint32 *
memcpy(buf + sizeof(*keylen), buffer_entry->buff + buffer_entry->pos, (size_t)copied_bytes);
}
buffer_entry_load(buffer_entry);
}
}
memcpy(buf+sizeof(*keylen)+copied_bytes, buffer_entry->buff + buffer_entry->pos, (size_t)lacked_bytes);
buffer_entry->pos += lacked_bytes;
return buf;
@ -97,7 +97,7 @@ void buffer_entry_destroy(buffer_entry_t * buffer_entry)
buffer_entry->buff = NULL;
buffer_entry->capacity = 0;
buffer_entry->nbytes = 0;
buffer_entry->pos = 0;
buffer_entry->pos = 0;
buffer_entry->eof = 0;
free(buffer_entry);
}

View File

@ -16,7 +16,7 @@ buffer_manage_t * buffer_manage_new(cmph_uint32 memory_avail, cmph_uint32 nentri
{
cmph_uint32 memory_avail_entry, i;
buffer_manage_t *buff_manage = (buffer_manage_t *)malloc(sizeof(buffer_manage_t));
assert(buff_manage);
if (!buff_manage) return NULL;
buff_manage->memory_avail = memory_avail;
buff_manage->buffer_entries = (buffer_entry_t **)calloc((size_t)nentries, sizeof(buffer_entry_t *));
buff_manage->memory_avail_list = (cmph_uint32 *)calloc((size_t)nentries, sizeof(cmph_uint32));
@ -26,7 +26,7 @@ buffer_manage_t * buffer_manage_new(cmph_uint32 memory_avail, cmph_uint32 nentri
for(i = 0; i < buff_manage->nentries; i++)
{
buff_manage->buffer_entries[i] = buffer_entry_new(memory_avail_entry);
}
}
return buff_manage;
}
@ -54,7 +54,7 @@ cmph_uint8 * buffer_manage_read_key(buffer_manage_t * buffer_manage, cmph_uint32
}
void buffer_manage_destroy(buffer_manage_t * buffer_manage)
{
{
cmph_uint32 i;
for(i = 0; i < buffer_manage->nentries; i++)
{

View File

@ -16,7 +16,7 @@ buffer_manager_t * buffer_manager_new(cmph_uint32 memory_avail, cmph_uint32 nent
{
cmph_uint32 memory_avail_entry, i;
buffer_manager_t *buff_manager = (buffer_manager_t *)malloc(sizeof(buffer_manager_t));
assert(buff_manager);
if (!buff_manager) return NULL;
buff_manager->memory_avail = memory_avail;
buff_manager->buffer_entries = (buffer_entry_t **)calloc((size_t)nentries, sizeof(buffer_entry_t *));
buff_manager->memory_avail_list = (cmph_uint32 *)calloc((size_t)nentries, sizeof(cmph_uint32));
@ -26,7 +26,7 @@ buffer_manager_t * buffer_manager_new(cmph_uint32 memory_avail, cmph_uint32 nent
for(i = 0; i < buff_manager->nentries; i++)
{
buff_manager->buffer_entries[i] = buffer_entry_new(memory_avail_entry);
}
}
return buff_manager;
}
@ -52,7 +52,7 @@ cmph_uint8 * buffer_manager_read_key(buffer_manager_t * buffer_manager, cmph_uin
}
void buffer_manager_destroy(buffer_manager_t * buffer_manager)
{
{
cmph_uint32 i;
for(i = 0; i < buffer_manager->nentries; i++)
{

View File

@ -18,7 +18,7 @@ chd_config_data_t *chd_config_new(cmph_config_t *mph)
cmph_io_adapter_t *key_source = mph->key_source;
chd_config_data_t *chd;
chd = (chd_config_data_t *)malloc(sizeof(chd_config_data_t));
assert(chd);
if (!chd) return NULL;
memset(chd, 0, sizeof(chd_config_data_t));
chd->chd_ph = cmph_config_new(key_source);
@ -69,12 +69,12 @@ cmph_t *chd_new(cmph_config_t *mph, double c)
chd_config_data_t *chd = (chd_config_data_t *)mph->data;
chd_ph_config_data_t * chd_ph = (chd_ph_config_data_t *)chd->chd_ph->data;
compressed_rank_t cr;
register cmph_t * chd_phf = NULL;
register cmph_uint32 packed_chd_phf_size = 0;
register cmph_uint32 packed_chd_phf_size = 0;
cmph_uint8 * packed_chd_phf = NULL;
register cmph_uint32 packed_cr_size = 0;
register cmph_uint32 packed_cr_size = 0;
cmph_uint8 * packed_cr = NULL;
register cmph_uint32 i, idx, nkeys, nvals, nbins;
@ -86,24 +86,24 @@ cmph_t *chd_new(cmph_config_t *mph, double c)
ELAPSED_TIME_IN_SECONDS(&construction_time_begin);
#endif
cmph_config_set_verbosity(chd->chd_ph, mph->verbosity);
cmph_config_set_verbosity(chd->chd_ph, mph->verbosity);
cmph_config_set_graphsize(chd->chd_ph, c);
if (mph->verbosity)
{
fprintf(stderr, "Generating a CHD_PH perfect hash function with a load factor equal to %.3f\n", c);
}
chd_phf = cmph_new(chd->chd_ph);
if(chd_phf == NULL)
if(chd_phf == NULL)
{
return NULL;
}
packed_chd_phf_size = cmph_packed_size(chd_phf);
packed_chd_phf_size = cmph_packed_size(chd_phf);
DEBUGP("packed_chd_phf_size = %u\n", packed_chd_phf_size);
/* Make sure that we have enough space to pack the mphf. */
packed_chd_phf = calloc((size_t)packed_chd_phf_size,(size_t)1);
@ -111,8 +111,8 @@ cmph_t *chd_new(cmph_config_t *mph, double c)
cmph_pack(chd_phf, packed_chd_phf);
cmph_destroy(chd_phf);
if (mph->verbosity)
{
fprintf(stderr, "Compressing the range of the resulting CHD_PH perfect hash function\n");
@ -121,11 +121,11 @@ cmph_t *chd_new(cmph_config_t *mph, double c)
compressed_rank_init(&cr);
nbins = chd_ph->n;
nkeys = chd_ph->m;
nvals = nbins - nkeys;
nvals = nbins - nkeys;
vals_table = (cmph_uint32 *)calloc(nvals, sizeof(cmph_uint32));
occup_table = (cmph_uint32 *)chd_ph->occup_table;
for(i = 0, idx = 0; i < nbins; i++)
{
if(!GETBIT32(occup_table, i))
@ -133,10 +133,10 @@ cmph_t *chd_new(cmph_config_t *mph, double c)
vals_table[idx++] = i;
}
}
compressed_rank_generate(&cr, vals_table, nvals);
free(vals_table);
packed_cr_size = compressed_rank_packed_size(&cr);
packed_cr = (cmph_uint8 *) calloc(packed_cr_size, sizeof(cmph_uint8));
compressed_rank_pack(&cr, packed_cr);
@ -145,16 +145,16 @@ cmph_t *chd_new(cmph_config_t *mph, double c)
mphf = (cmph_t *)malloc(sizeof(cmph_t));
mphf->algo = mph->algo;
chdf = (chd_data_t *)malloc(sizeof(chd_data_t));
chdf->packed_cr = packed_cr;
packed_cr = NULL; //transfer memory ownership
chdf->packed_chd_phf = packed_chd_phf;
packed_chd_phf = NULL; //transfer memory ownership
chdf->packed_chd_phf_size = packed_chd_phf_size;
chdf->packed_cr_size = packed_cr_size;
mphf->data = chdf;
mphf->size = nkeys;
@ -163,12 +163,12 @@ cmph_t *chd_new(cmph_config_t *mph, double c)
{
fprintf(stderr, "Successfully generated minimal perfect hash function\n");
}
#ifdef CMPH_TIMING
#ifdef CMPH_TIMING
ELAPSED_TIME_IN_SECONDS(&construction_time);
register cmph_uint32 space_usage = chd_packed_size(mphf)*8;
construction_time = construction_time - construction_time_begin;
fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", nkeys, c, chd_ph->keys_per_bucket, construction_time, space_usage/(double)nkeys);
#endif
#endif
return mphf;
}
@ -196,7 +196,7 @@ int chd_dump(cmph_t *mphf, FILE *fd)
{
register size_t nbytes;
chd_data_t *data = (chd_data_t *)mphf->data;
__cmph_dump(mphf, fd);
// Dumping CHD_PH perfect hash function
@ -207,7 +207,7 @@ int chd_dump(cmph_t *mphf, FILE *fd)
DEBUGP("Dumping compressed rank structure with %u bytes to disk\n", 1);
nbytes = fwrite(&data->packed_cr_size, sizeof(cmph_uint32), (size_t)1, fd);
nbytes = fwrite(data->packed_cr, data->packed_cr_size, (size_t)1, fd);
return 1;
}
@ -242,10 +242,10 @@ void chd_pack(cmph_t *mphf, void *packed_mphf)
// packing packed_cr_size and packed_cr
*ptr = data->packed_cr_size;
ptr8 = (cmph_uint8 *) (ptr + 1);
memcpy(ptr8, data->packed_cr, data->packed_cr_size);
ptr8 += data->packed_cr_size;
ptr = (cmph_uint32 *) ptr8;
*ptr = data->packed_chd_phf_size;
@ -268,5 +268,3 @@ cmph_uint32 chd_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke
register cmph_uint8 * packed_chd_phf = ((cmph_uint8 *) ptr) + packed_cr_size + sizeof(cmph_uint32);
return _chd_search(packed_chd_phf, ptr, key, keylen);
}

View File

@ -29,7 +29,7 @@ struct _chd_ph_item_t
};
typedef struct _chd_ph_item_t chd_ph_item_t;
// struct to represent the items at mapping phase only.
// struct to represent the items at mapping phase only.
struct _chd_ph_map_item_t
{
cmph_uint32 f;
@ -85,7 +85,7 @@ static cmph_uint8 chd_ph_bucket_insert(chd_ph_bucket_t * buckets,chd_ph_map_item
register chd_ph_map_item_t * tmp_map_item = map_items + item_idx;
register chd_ph_bucket_t * bucket = buckets + tmp_map_item->bucket_num;
tmp_item = items + bucket->items_list;
for(i = 0; i < bucket->size; i++)
{
if(tmp_item->f == tmp_map_item->f && tmp_item->h == tmp_map_item->h)
@ -105,7 +105,7 @@ void chd_ph_bucket_destroy(chd_ph_bucket_t * buckets)
free(buckets);
}
static inline cmph_uint8 chd_ph_mapping(cmph_config_t *mph, chd_ph_bucket_t * buckets, chd_ph_item_t * items,
static inline cmph_uint8 chd_ph_mapping(cmph_config_t *mph, chd_ph_bucket_t * buckets, chd_ph_item_t * items,
cmph_uint32 *max_bucket_size);
static chd_ph_sorted_list_t * chd_ph_ordering(chd_ph_bucket_t ** _buckets,chd_ph_item_t ** items,
@ -131,7 +131,7 @@ static inline double chd_ph_get_entropy(cmph_uint32 * disp_table, cmph_uint32 n,
{
probe_counts[disp_table[i]]++;
};
for(i = 0; i < max_probes; i++)
{
if(probe_counts[i] > 0)
@ -145,9 +145,9 @@ chd_ph_config_data_t *chd_ph_config_new(void)
{
chd_ph_config_data_t *chd_ph;
chd_ph = (chd_ph_config_data_t *)malloc(sizeof(chd_ph_config_data_t));
assert(chd_ph);
if (!chd_ph) return NULL;
memset(chd_ph, 0, sizeof(chd_ph_config_data_t));
chd_ph->hashfunc = CMPH_HASH_JENKINS;
chd_ph->cs = NULL;
chd_ph->nbuckets = 0;
@ -159,7 +159,7 @@ chd_ph_config_data_t *chd_ph_config_new(void)
chd_ph->keys_per_bin = 1;
chd_ph->keys_per_bucket = 4;
chd_ph->occup_table = 0;
return chd_ph;
}
@ -184,7 +184,7 @@ void chd_ph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
while(*hashptr != CMPH_HASH_COUNT)
{
if (i >= 1) break; //chd_ph only uses one linear hash function
chd_ph->hashfunc = *hashptr;
chd_ph->hashfunc = *hashptr;
++i, ++hashptr;
}
}
@ -228,24 +228,24 @@ cmph_uint8 chd_ph_mapping(cmph_config_t *mph, chd_ph_bucket_t * buckets, chd_ph_
{
mapping_iterations--;
if (chd_ph->hl) hash_state_destroy(chd_ph->hl);
chd_ph->hl = hash_state_new(chd_ph->hashfunc, chd_ph->m);
chd_ph->hl = hash_state_new(chd_ph->hashfunc, chd_ph->m);
chd_ph_bucket_clean(buckets, chd_ph->nbuckets);
mph->key_source->rewind(mph->key_source->data);
mph->key_source->rewind(mph->key_source->data);
for(i = 0; i < chd_ph->m; i++)
{
mph->key_source->read(mph->key_source->data, &key, &keylen);
mph->key_source->read(mph->key_source->data, &key, &keylen);
hash_vector(chd_ph->hl, key, keylen, hl);
map_item = (map_items + i);
g = hl[0] % chd_ph->nbuckets;
map_item->f = hl[1] % chd_ph->n;
map_item->h = hl[2] % (chd_ph->n - 1) + 1;
map_item->bucket_num=g;
mph->key_source->dispose(mph->key_source->data, key, keylen);
mph->key_source->dispose(mph->key_source->data, key, keylen);
// if(buckets[g].size == (chd_ph->keys_per_bucket << 2))
// {
// DEBUGP("BUCKET = %u -- SIZE = %u -- MAXIMUM SIZE = %u\n", g, buckets[g].size, (chd_ph->keys_per_bucket << 2));
@ -275,7 +275,7 @@ cmph_uint8 chd_ph_mapping(cmph_config_t *mph, chd_ph_bucket_t * buckets, chd_ph_
free(map_items);
return 1; // SUCCESS
}
if(mapping_iterations == 0)
{
goto error;
@ -292,7 +292,7 @@ chd_ph_sorted_list_t * chd_ph_ordering(chd_ph_bucket_t ** _buckets, chd_ph_item_
cmph_uint32 nbuckets, cmph_uint32 nitems, cmph_uint32 max_bucket_size)
{
chd_ph_sorted_list_t * sorted_lists = (chd_ph_sorted_list_t *) calloc(max_bucket_size + 1, sizeof(chd_ph_sorted_list_t));
chd_ph_bucket_t * input_buckets = (*_buckets);
chd_ph_bucket_t * output_buckets;
chd_ph_item_t * input_items = (*_items);
@ -319,7 +319,7 @@ chd_ph_sorted_list_t * chd_ph_ordering(chd_ph_bucket_t ** _buckets, chd_ph_item_
// Store the buckets in a new array which is sorted by bucket sizes
output_buckets = calloc(nbuckets, sizeof(chd_ph_bucket_t)); // everything is initialized with zero
// non_empty_buckets = nbuckets;
for(i = 0; i < nbuckets; i++)
{
bucket_size = input_buckets[i].size;
@ -338,8 +338,8 @@ chd_ph_sorted_list_t * chd_ph_ordering(chd_ph_bucket_t ** _buckets, chd_ph_item_
// Return the buckets sorted in new order and free the old buckets sorted in old order
free(input_buckets);
(*_buckets) = output_buckets;
// Store the items according to the new order of buckets.
output_items = (chd_ph_item_t*)calloc(nitems, sizeof(chd_ph_item_t));
position = 0;
@ -426,26 +426,26 @@ static inline cmph_uint8 place_bucket_probe(chd_ph_config_data_t *chd_ph, chd_ph
}
position = (cmph_uint32)((item->f + ((cmph_uint64 )item->h) * probe0_num + probe1_num) % chd_ph->n);
UNSETBIT32(((cmph_uint32*)chd_ph->occup_table), position);
// ([position/32]^=(1<<(position%32));
item++;
i--;
};
};
return 0;
}
}
return 1;
};
static inline cmph_uint8 place_bucket(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, chd_ph_item_t * items, cmph_uint32 max_probes,
static inline cmph_uint8 place_bucket(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, chd_ph_item_t * items, cmph_uint32 max_probes,
cmph_uint32 * disp_table, cmph_uint32 bucket_num, cmph_uint32 size)
{
register cmph_uint32 probe0_num, probe1_num, probe_num;
probe0_num = 0;
probe1_num = 0;
probe_num = 0;
while(1)
{
if(place_bucket_probe(chd_ph, buckets, items, probe0_num, probe1_num, bucket_num,size))
@ -469,7 +469,7 @@ static inline cmph_uint8 place_bucket(chd_ph_config_data_t *chd_ph, chd_ph_bucke
};
static inline cmph_uint8 place_buckets1(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t * buckets, chd_ph_item_t *items,
cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes,
cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes,
cmph_uint32 * disp_table)
{
register cmph_uint32 i = 0;
@ -490,8 +490,8 @@ static inline cmph_uint8 place_buckets1(chd_ph_config_data_t *chd_ph, chd_ph_buc
return 1;
};
static inline cmph_uint8 place_buckets2(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, chd_ph_item_t * items,
cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes,
static inline cmph_uint8 place_buckets2(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, chd_ph_item_t * items,
cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes,
cmph_uint32 * disp_table)
{
register cmph_uint32 i,j, non_placed_bucket;
@ -516,10 +516,10 @@ static inline cmph_uint8 place_buckets2(chd_ph_config_data_t *chd_ph, chd_ph_buc
{
// if bucket is successfully placed remove it from list
if(place_bucket_probe(chd_ph, buckets, items, probe0_num, probe1_num, curr_bucket, i))
{
{
disp_table[buckets[curr_bucket].bucket_id] = probe0_num + probe1_num * chd_ph->n;
// DEBUGP("BUCKET %u PLACED --- DISPLACEMENT = %u\n", curr_bucket, disp_table[curr_bucket]);
}
}
else
{
// DEBUGP("BUCKET %u NOT PLACED\n", curr_bucket);
@ -529,7 +529,7 @@ static inline cmph_uint8 place_buckets2(chd_ph_config_data_t *chd_ph, chd_ph_buc
#endif
buckets[non_placed_bucket + sorted_lists[i].buckets_list].items_list = buckets[curr_bucket].items_list;
buckets[non_placed_bucket + sorted_lists[i].buckets_list].bucket_id = buckets[curr_bucket].bucket_id;
#ifdef DEBUG
#ifdef DEBUG
buckets[curr_bucket].items_list=items_list;
buckets[curr_bucket].bucket_id=bucket_id;
#endif
@ -557,7 +557,7 @@ static inline cmph_uint8 place_buckets2(chd_ph_config_data_t *chd_ph, chd_ph_buc
};
cmph_uint8 chd_ph_searching(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, chd_ph_item_t *items ,
cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes,
cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes,
cmph_uint32 * disp_table)
{
if(chd_ph->use_h)
@ -582,7 +582,7 @@ static inline cmph_uint8 chd_ph_check_bin_hashing(chd_ph_config_data_t *chd_ph,
memset(chd_ph->occup_table, 0, chd_ph->n);
else
memset(chd_ph->occup_table, 0, ((chd_ph->n + 31)/32) * sizeof(cmph_uint32));
for(bucket_size = 1; bucket_size <= max_bucket_size; bucket_size++)
for(i = sorted_lists[bucket_size].buckets_list; i < sorted_lists[bucket_size].size +
sorted_lists[bucket_size].buckets_list; i++)
@ -602,7 +602,7 @@ static inline cmph_uint8 chd_ph_check_bin_hashing(chd_ph_config_data_t *chd_ph,
return 0;
}
(chd_ph->occup_table[position])++;
}
}
else
{
if(GETBIT32(((cmph_uint32*)chd_ph->occup_table), position))
@ -624,7 +624,7 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
cmph_t *mphf = NULL;
chd_ph_data_t *chd_phf = NULL;
chd_ph_config_data_t *chd_ph = (chd_ph_config_data_t *)mph->data;
register double load_factor = c;
register cmph_uint8 searching_success = 0;
register cmph_uint32 max_probes = 1 << 20; // default value for max_probes
@ -645,24 +645,24 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
chd_ph->m = mph->key_source->nkeys;
DEBUGP("m = %u\n", chd_ph->m);
chd_ph->nbuckets = (cmph_uint32)(chd_ph->m/chd_ph->keys_per_bucket) + 1;
DEBUGP("nbuckets = %u\n", chd_ph->nbuckets);
if(load_factor < 0.5 )
{
load_factor = 0.5;
}
if(load_factor >= 0.99)
{
load_factor = 0.99;
}
DEBUGP("load_factor = %.3f\n", load_factor);
chd_ph->n = (cmph_uint32)(chd_ph->m/(chd_ph->keys_per_bin * load_factor)) + 1;
//Round the number of bins to the prime immediately above
if(chd_ph->n % 2 == 0) chd_ph->n++;
for(;;)
@ -670,35 +670,35 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
if(check_primality(chd_ph->n) == 1)
break;
chd_ph->n += 2; // just odd numbers can be primes for n > 2
};
DEBUGP("n = %u \n", chd_ph->n);
if(chd_ph->keys_per_bin == 1)
{
space_lower_bound = chd_ph_space_lower_bound(chd_ph->m, chd_ph->n);
}
if(mph->verbosity)
{
fprintf(stderr, "space lower bound is %.3f bits per key\n", space_lower_bound);
}
// We allocate the working tables
buckets = chd_ph_bucket_new(chd_ph->nbuckets);
buckets = chd_ph_bucket_new(chd_ph->nbuckets);
items = (chd_ph_item_t *) calloc(chd_ph->m, sizeof(chd_ph_item_t));
max_probes = (cmph_uint32)(((log(chd_ph->m)/log(2))/20) * max_probes);
if(chd_ph->keys_per_bin == 1)
chd_ph->occup_table = (cmph_uint8 *) calloc(((chd_ph->n + 31)/32), sizeof(cmph_uint32));
else
chd_ph->occup_table = (cmph_uint8 *) calloc(chd_ph->n, sizeof(cmph_uint8));
disp_table = (cmph_uint32 *) calloc(chd_ph->nbuckets, sizeof(cmph_uint32));
//
//
// init_genrand(time(0));
while(1)
{
iterations --;
@ -706,12 +706,12 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
{
fprintf(stderr, "Starting mapping step for mph creation of %u keys with %u bins\n", chd_ph->m, chd_ph->n);
}
if(!chd_ph_mapping(mph, buckets, items, &max_bucket_size))
{
if (mph->verbosity)
{
fprintf(stderr, "Failure in mapping step\n");
fprintf(stderr, "Failure in mapping step\n");
}
failure = 1;
goto cleanup;
@ -727,15 +727,15 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
}
sorted_lists = chd_ph_ordering(&buckets, &items, chd_ph->nbuckets, chd_ph->m, max_bucket_size);
if (mph->verbosity)
{
fprintf(stderr, "Starting searching step\n");
}
searching_success = chd_ph_searching(chd_ph, buckets, items, max_bucket_size, sorted_lists, max_probes, disp_table);
if(searching_success) break;
// reset occup_table
if(chd_ph->keys_per_bin > 1)
memset(chd_ph->occup_table, 0, chd_ph->n);
@ -757,19 +757,19 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
{
if(!chd_ph_check_bin_hashing(chd_ph, buckets, items, disp_table,sorted_lists,max_bucket_size))
{
DEBUGP("Error for bin packing generation");
failure = 1;
goto cleanup;
}
}
#endif
if (mph->verbosity)
{
fprintf(stderr, "Starting compressing step\n");
}
if(chd_ph->cs)
{
free(chd_ph->cs);
@ -777,7 +777,7 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
chd_ph->cs = (compressed_seq_t *) calloc(1, sizeof(compressed_seq_t));
compressed_seq_init(chd_ph->cs);
compressed_seq_generate(chd_ph->cs, disp_table, chd_ph->nbuckets);
#ifdef CMPH_TIMING
ELAPSED_TIME_IN_SECONDS(&construction_time);
register double entropy = chd_ph_get_entropy(disp_table, chd_ph->nbuckets, max_probes);
@ -785,11 +785,11 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
#endif
cleanup:
chd_ph_bucket_destroy(buckets);
chd_ph_bucket_destroy(buckets);
free(items);
free(sorted_lists);
free(disp_table);
if(failure)
if(failure)
{
if(chd_ph->hl)
{
@ -802,14 +802,14 @@ cleanup:
mphf = (cmph_t *)malloc(sizeof(cmph_t));
mphf->algo = mph->algo;
chd_phf = (chd_ph_data_t *)malloc(sizeof(chd_ph_data_t));
chd_phf->cs = chd_ph->cs;
chd_ph->cs = NULL; //transfer memory ownership
chd_phf->hl = chd_ph->hl;
chd_ph->hl = NULL; //transfer memory ownership
chd_phf->n = chd_ph->n;
chd_phf->nbuckets = chd_ph->nbuckets;
mphf->data = chd_phf;
mphf->size = chd_ph->n;
@ -818,12 +818,12 @@ cleanup:
{
fprintf(stderr, "Successfully generated minimal perfect hash function\n");
}
#ifdef CMPH_TIMING
#ifdef CMPH_TIMING
register cmph_uint32 space_usage = chd_ph_packed_size(mphf)*8;
construction_time = construction_time - construction_time_begin;
fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\t%.4f\t%.4f\n", chd_ph->m, load_factor, chd_ph->keys_per_bucket, construction_time, space_usage/(double)chd_ph->m, space_lower_bound, entropy/chd_ph->m);
#endif
#endif
return mphf;
}
@ -846,19 +846,19 @@ void chd_ph_load(FILE *fd, cmph_t *mphf)
nbytes = fread(buf, (size_t)buflen, (size_t)1, fd);
chd_ph->hl = hash_state_load(buf, buflen);
free(buf);
nbytes = fread(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
DEBUGP("Compressed sequence structure has %u bytes\n", buflen);
buf = (char *)malloc((size_t)buflen);
nbytes = fread(buf, (size_t)buflen, (size_t)1, fd);
chd_ph->cs = (compressed_seq_t *) calloc(1, sizeof(compressed_seq_t));
chd_ph->cs = (compressed_seq_t *) calloc(1, sizeof(compressed_seq_t));
compressed_seq_load(chd_ph->cs, buf, buflen);
free(buf);
// loading n and nbuckets
DEBUGP("Reading n and nbuckets\n");
nbytes = fread(&(chd_ph->n), sizeof(cmph_uint32), (size_t)1, fd);
nbytes = fread(&(chd_ph->nbuckets), sizeof(cmph_uint32), (size_t)1, fd);
nbytes = fread(&(chd_ph->n), sizeof(cmph_uint32), (size_t)1, fd);
nbytes = fread(&(chd_ph->nbuckets), sizeof(cmph_uint32), (size_t)1, fd);
}
int chd_ph_dump(cmph_t *mphf, FILE *fd)
@ -867,7 +867,7 @@ int chd_ph_dump(cmph_t *mphf, FILE *fd)
cmph_uint32 buflen;
register size_t nbytes;
chd_ph_data_t *data = (chd_ph_data_t *)mphf->data;
__cmph_dump(mphf, fd);
hash_state_dump(data->hl, &buf, &buflen);
@ -906,11 +906,11 @@ cmph_uint32 chd_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
register cmph_uint32 disp,position;
register cmph_uint32 probe0_num,probe1_num;
register cmph_uint32 f,g,h;
hash_vector(chd_ph->hl, key, keylen, hl);
hash_vector(chd_ph->hl, key, keylen, hl);
g = hl[0] % chd_ph->nbuckets;
f = hl[1] % chd_ph->n;
h = hl[2] % (chd_ph->n-1) + 1;
disp = compressed_seq_query(chd_ph->cs, g);
probe0_num = disp % chd_ph->n;
probe1_num = disp/chd_ph->n;
@ -949,10 +949,10 @@ void chd_ph_pack(cmph_t *mphf, void *packed_mphf)
cmph_uint32 chd_ph_packed_size(cmph_t *mphf)
{
register chd_ph_data_t *data = (chd_ph_data_t *)mphf->data;
register CMPH_HASH hl_type = hash_get_type(data->hl);
register CMPH_HASH hl_type = hash_get_type(data->hl);
register cmph_uint32 hash_state_pack_size = hash_state_packed_size(hl_type);
register cmph_uint32 cs_pack_size = compressed_seq_packed_size(data->cs);
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_pack_size + cs_pack_size + 3*sizeof(cmph_uint32));
}
@ -961,28 +961,25 @@ cmph_uint32 chd_ph_search_packed(void *packed_mphf, const char *key, cmph_uint32
{
register CMPH_HASH hl_type = *(cmph_uint32 *)packed_mphf;
register cmph_uint8 *hl_ptr = (cmph_uint8 *)(packed_mphf) + 4;
register cmph_uint32 * ptr = (cmph_uint32 *)(hl_ptr + hash_state_packed_size(hl_type));
register cmph_uint32 n = *ptr++;
register cmph_uint32 nbuckets = *ptr++;
cmph_uint32 hl[3];
register cmph_uint32 disp,position;
register cmph_uint32 probe0_num,probe1_num;
register cmph_uint32 f,g,h;
hash_vector_packed(hl_ptr, hl_type, key, keylen, hl);
g = hl[0] % nbuckets;
f = hl[1] % n;
h = hl[2] % (n-1) + 1;
disp = compressed_seq_query_packed(ptr, g);
probe0_num = disp % n;
probe1_num = disp/n;
position = (cmph_uint32)((f + ((cmph_uint64 )h)*probe0_num + probe1_num) % n);
return position;
}

View File

@ -21,7 +21,7 @@ chm_config_data_t *chm_config_new(void)
{
chm_config_data_t *chm = NULL;
chm = (chm_config_data_t *)malloc(sizeof(chm_config_data_t));
assert(chm);
if (!chm) return NULL;
memset(chm, 0, sizeof(chm_config_data_t));
chm->hashfuncs[0] = CMPH_HASH_JENKINS;
chm->hashfuncs[1] = CMPH_HASH_JENKINS;
@ -45,7 +45,7 @@ void chm_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
while(*hashptr != CMPH_HASH_COUNT)
{
if (i >= 2) break; //chm only uses two hash functions
chm->hashfuncs[i] = *hashptr;
chm->hashfuncs[i] = *hashptr;
++i, ++hashptr;
}
}
@ -61,7 +61,7 @@ cmph_t *chm_new(cmph_config_t *mph, double c)
chm_config_data_t *chm = (chm_config_data_t *)mph->data;
chm->m = mph->key_source->nkeys;
if (c == 0) c = 2.09;
chm->n = (cmph_uint32)ceil(c * mph->key_source->nkeys);
chm->n = (cmph_uint32)ceil(c * mph->key_source->nkeys);
DEBUGP("m (edges): %u n (vertices): %u c: %f\n", chm->m, chm->n, c);
chm->graph = graph_new(chm->n, chm->m);
DEBUGP("Created graph\n");
@ -92,12 +92,12 @@ cmph_t *chm_new(cmph_config_t *mph, double c)
fprintf(stderr, "Acyclic graph creation failure - %u iterations remaining\n", iterations);
}
if (iterations == 0) break;
}
else break;
}
else break;
}
if (iterations == 0)
{
graph_destroy(chm->graph);
graph_destroy(chm->graph);
return NULL;
}
@ -120,7 +120,7 @@ cmph_t *chm_new(cmph_config_t *mph, double c)
chm_traverse(chm, visited, i);
}
}
graph_destroy(chm->graph);
graph_destroy(chm->graph);
free(visited);
chm->graph = NULL;
@ -149,7 +149,7 @@ static void chm_traverse(chm_config_data_t *chm, cmph_uint8 *visited, cmph_uint3
graph_iterator_t it = graph_neighbors_it(chm->graph, v);
cmph_uint32 neighbor = 0;
SETBIT(visited,v);
DEBUGP("Visiting vertex %u\n", v);
while((neighbor = graph_next_neighbor(chm->graph, &it)) != GRAPH_NO_NEIGHBOR)
{
@ -162,7 +162,7 @@ static void chm_traverse(chm_config_data_t *chm, cmph_uint8 *visited, cmph_uint3
chm_traverse(chm, visited, neighbor);
}
}
static int chm_gen_edges(cmph_config_t *mph)
{
cmph_uint32 e;
@ -170,7 +170,7 @@ static int chm_gen_edges(cmph_config_t *mph)
int cycles = 0;
DEBUGP("Generating edges for %u vertices with hash functions %s and %s\n", chm->n, cmph_hash_names[chm->hashfuncs[0]], cmph_hash_names[chm->hashfuncs[1]]);
graph_clear_edges(chm->graph);
graph_clear_edges(chm->graph);
mph->key_source->rewind(mph->key_source->data);
for (e = 0; e < mph->key_source->nkeys; ++e)
{
@ -181,7 +181,7 @@ static int chm_gen_edges(cmph_config_t *mph)
h1 = hash(chm->hashes[0], key, keylen) % chm->n;
h2 = hash(chm->hashes[1], key, keylen) % chm->n;
if (h1 == h2) if (++h2 >= chm->n) h2 = 0;
if (h1 == h2)
if (h1 == h2)
{
if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e);
mph->key_source->dispose(mph->key_source->data, key, keylen);
@ -205,7 +205,7 @@ int chm_dump(cmph_t *mphf, FILE *fd)
cmph_uint32 two = 2; //number of hash functions
chm_data_t *data = (chm_data_t *)mphf->data;
register size_t nbytes;
__cmph_dump(mphf, fd);
nbytes = fwrite(&two, sizeof(cmph_uint32), (size_t)1, fd);
@ -223,7 +223,7 @@ int chm_dump(cmph_t *mphf, FILE *fd)
nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd);
nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
nbytes = fwrite(data->g, sizeof(cmph_uint32)*data->n, (size_t)1, fd);
/* #ifdef DEBUG
fprintf(stderr, "G: ");
@ -260,8 +260,8 @@ void chm_load(FILE *f, cmph_t *mphf)
}
DEBUGP("Reading m and n\n");
nbytes = fread(&(chm->n), sizeof(cmph_uint32), (size_t)1, f);
nbytes = fread(&(chm->m), sizeof(cmph_uint32), (size_t)1, f);
nbytes = fread(&(chm->n), sizeof(cmph_uint32), (size_t)1, f);
nbytes = fread(&(chm->m), sizeof(cmph_uint32), (size_t)1, f);
chm->g = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*chm->n);
nbytes = fread(chm->g, chm->n*sizeof(cmph_uint32), (size_t)1, f);
@ -272,7 +272,7 @@ void chm_load(FILE *f, cmph_t *mphf)
#endif
return;
}
cmph_uint32 chm_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
{
@ -287,7 +287,7 @@ cmph_uint32 chm_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
void chm_destroy(cmph_t *mphf)
{
chm_data_t *data = (chm_data_t *)mphf->data;
free(data->g);
free(data->g);
hash_state_destroy(data->hashes[0]);
hash_state_destroy(data->hashes[1]);
free(data->hashes);
@ -298,7 +298,7 @@ void chm_destroy(cmph_t *mphf)
/** \fn void chm_pack(cmph_t *mphf, void *packed_mphf);
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
* \param mphf pointer to the resulting mphf
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
*/
void chm_pack(cmph_t *mphf, void *packed_mphf)
{
@ -332,26 +332,26 @@ void chm_pack(cmph_t *mphf, void *packed_mphf)
ptr += sizeof(data->m);
// packing g
memcpy(ptr, data->g, sizeof(cmph_uint32)*data->n);
memcpy(ptr, data->g, sizeof(cmph_uint32)*data->n);
}
/** \fn cmph_uint32 chm_packed_size(cmph_t *mphf);
* \brief Return the amount of space needed to pack mphf.
* \param mphf pointer to a mphf
* \return the size of the packed function or zero for failures
*/
*/
cmph_uint32 chm_packed_size(cmph_t *mphf)
{
chm_data_t *data = (chm_data_t *)mphf->data;
CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
4*sizeof(cmph_uint32) + sizeof(cmph_uint32)*data->n);
}
/** cmph_uint32 chm_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
* \brief Use the packed mphf to do a search.
* \brief Use the packed mphf to do a search.
* \param packed_mphf pointer to the packed mphf
* \param key key to be hashed
* \param keylen key legth in bytes
@ -366,16 +366,16 @@ cmph_uint32 chm_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke
register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr);
h2_ptr += 4;
register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type));
register cmph_uint32 n = *g_ptr++;
register cmph_uint32 m = *g_ptr++;
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n;
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n;
register cmph_uint32 n = *g_ptr++;
register cmph_uint32 m = *g_ptr++;
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n;
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n;
DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
if (h1 == h2 && ++h2 >= n) h2 = 0;
DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, g_ptr[h1], g_ptr[h2], m);
return (g_ptr[h1] + g_ptr[h2]) % m;
return (g_ptr[h1] + g_ptr[h2]) % m;
}

View File

@ -1,10 +1,10 @@
#include "cmph.h"
#include "cmph_structs.h"
#include "chm.h"
#include "bmz.h"
#include "bmz8.h"
#include "brz.h"
#include "fch.h"
#include "bmz.h"
#include "bmz8.h"
#include "brz.h"
#include "fch.h"
#include "bdz.h"
#include "bdz_ph.h"
#include "chd_ph.h"
@ -268,12 +268,12 @@ cmph_io_adapter_t *cmph_io_struct_vector_adapter(void * vector, cmph_uint32 stru
key_source->read = key_struct_vector_read;
key_source->dispose = key_vector_dispose;
key_source->rewind = key_struct_vector_rewind;
return key_source;
return key_source;
}
void cmph_io_struct_vector_adapter_destroy(cmph_io_adapter_t * key_source)
{
cmph_io_struct_vector_destroy(key_source);
cmph_io_struct_vector_destroy(key_source);
}
cmph_io_adapter_t *cmph_io_vector_adapter(char ** vector, cmph_uint32 nkeys)
@ -374,7 +374,7 @@ void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo)
void cmph_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir)
{
if (mph->algo == CMPH_BRZ)
if (mph->algo == CMPH_BRZ)
{
brz_config_set_tmp_dir(mph, tmp_dir);
}
@ -383,7 +383,7 @@ void cmph_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir)
void cmph_config_set_mphf_fd(cmph_config_t *mph, FILE *mphf_fd)
{
if (mph->algo == CMPH_BRZ)
if (mph->algo == CMPH_BRZ)
{
brz_config_set_mphf_fd(mph, mphf_fd);
}
@ -391,19 +391,19 @@ void cmph_config_set_mphf_fd(cmph_config_t *mph, FILE *mphf_fd)
void cmph_config_set_b(cmph_config_t *mph, cmph_uint32 b)
{
if (mph->algo == CMPH_BRZ)
if (mph->algo == CMPH_BRZ)
{
brz_config_set_b(mph, b);
}
else if (mph->algo == CMPH_BDZ)
else if (mph->algo == CMPH_BDZ)
{
bdz_config_set_b(mph, b);
}
else if (mph->algo == CMPH_CHD_PH)
else if (mph->algo == CMPH_CHD_PH)
{
chd_ph_config_set_b(mph, b);
}
else if (mph->algo == CMPH_CHD)
else if (mph->algo == CMPH_CHD)
{
chd_config_set_b(mph, b);
}
@ -411,11 +411,11 @@ void cmph_config_set_b(cmph_config_t *mph, cmph_uint32 b)
void cmph_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin)
{
if (mph->algo == CMPH_CHD_PH)
if (mph->algo == CMPH_CHD_PH)
{
chd_ph_config_set_keys_per_bin(mph, keys_per_bin);
}
else if (mph->algo == CMPH_CHD)
else if (mph->algo == CMPH_CHD)
{
chd_config_set_keys_per_bin(mph, keys_per_bin);
}
@ -423,7 +423,7 @@ void cmph_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin)
void cmph_config_set_memory_availability(cmph_config_t *mph, cmph_uint32 memory_availability)
{
if (mph->algo == CMPH_BRZ)
if (mph->algo == CMPH_BRZ)
{
brz_config_set_memory_availability(mph, memory_availability);
}
@ -523,7 +523,7 @@ cmph_t *cmph_new(cmph_config_t *mph)
double c = mph->c;
DEBUGP("Creating mph with algorithm %s\n", cmph_names[mph->algo]);
switch (mph->algo)
switch (mph->algo)
{
case CMPH_CHM:
DEBUGP("Creating chm hash\n");
@ -658,28 +658,28 @@ cmph_uint32 cmph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
case CMPH_CHM:
return chm_search(mphf, key, keylen);
case CMPH_BMZ: /* included -- Fabiano */
DEBUGP("bmz algorithm search\n");
DEBUGP("bmz algorithm search\n");
return bmz_search(mphf, key, keylen);
case CMPH_BMZ8: /* included -- Fabiano */
DEBUGP("bmz8 algorithm search\n");
DEBUGP("bmz8 algorithm search\n");
return bmz8_search(mphf, key, keylen);
case CMPH_BRZ: /* included -- Fabiano */
DEBUGP("brz algorithm search\n");
DEBUGP("brz algorithm search\n");
return brz_search(mphf, key, keylen);
case CMPH_FCH: /* included -- Fabiano */
DEBUGP("fch algorithm search\n");
DEBUGP("fch algorithm search\n");
return fch_search(mphf, key, keylen);
case CMPH_BDZ: /* included -- Fabiano */
DEBUGP("bdz algorithm search\n");
DEBUGP("bdz algorithm search\n");
return bdz_search(mphf, key, keylen);
case CMPH_BDZ_PH: /* included -- Fabiano */
DEBUGP("bdz_ph algorithm search\n");
DEBUGP("bdz_ph algorithm search\n");
return bdz_ph_search(mphf, key, keylen);
case CMPH_CHD_PH: /* included -- Fabiano */
DEBUGP("chd_ph algorithm search\n");
DEBUGP("chd_ph algorithm search\n");
return chd_ph_search(mphf, key, keylen);
case CMPH_CHD: /* included -- Fabiano */
DEBUGP("chd algorithm search\n");
DEBUGP("chd algorithm search\n");
return chd_search(mphf, key, keylen);
default:
assert(0);
@ -692,7 +692,7 @@ cmph_uint32 cmph_size(cmph_t *mphf)
{
return mphf->size;
}
void cmph_destroy(cmph_t *mphf)
{
switch(mphf->algo)
@ -724,7 +724,7 @@ void cmph_destroy(cmph_t *mphf)
case CMPH_CHD: /* included -- Fabiano */
chd_destroy(mphf);
return;
default:
default:
assert(0);
}
assert(0);
@ -735,12 +735,12 @@ void cmph_destroy(cmph_t *mphf)
/** \fn void cmph_pack(cmph_t *mphf, void *packed_mphf);
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
* \param mphf pointer to the resulting mphf
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
*/
void cmph_pack(cmph_t *mphf, void *packed_mphf)
{
// packing algorithm type to be used in cmph.c
cmph_uint32 * ptr = (cmph_uint32 *) packed_mphf;
cmph_uint32 * ptr = (cmph_uint32 *) packed_mphf;
*ptr++ = mphf->algo;
DEBUGP("mphf->algo = %u\n", mphf->algo);
switch(mphf->algo)
@ -772,7 +772,7 @@ void cmph_pack(cmph_t *mphf, void *packed_mphf)
case CMPH_CHD: /* included -- Fabiano */
chd_pack(mphf, ptr);
break;
default:
default:
assert(0);
}
return;
@ -782,7 +782,7 @@ void cmph_pack(cmph_t *mphf, void *packed_mphf)
* \brief Return the amount of space needed to pack mphf.
* \param mphf pointer to a mphf
* \return the size of the packed function or zero for failures
*/
*/
cmph_uint32 cmph_packed_size(cmph_t *mphf)
{
switch(mphf->algo)
@ -805,14 +805,14 @@ cmph_uint32 cmph_packed_size(cmph_t *mphf)
return chd_ph_packed_size(mphf);
case CMPH_CHD: /* included -- Fabiano */
return chd_packed_size(mphf);
default:
default:
assert(0);
}
return 0; // FAILURE
}
/** cmph_uint32 cmph_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
* \brief Use the packed mphf to do a search.
* \brief Use the packed mphf to do a search.
* \param packed_mphf pointer to the packed mphf
* \param key key to be hashed
* \param keylen key legth in bytes
@ -842,7 +842,7 @@ cmph_uint32 cmph_search_packed(void *packed_mphf, const char *key, cmph_uint32 k
return chd_ph_search_packed(++ptr, key, keylen);
case CMPH_CHD: /* included -- Fabiano */
return chd_search_packed(++ptr, key, keylen);
default:
default:
assert(0);
}
return 0; // FAILURE

View File

@ -28,7 +28,7 @@ void __cmph_dump(cmph_t *mphf, FILE *fd)
nbytes = fwrite(cmph_names[mphf->algo], (size_t)(strlen(cmph_names[mphf->algo]) + 1), (size_t)1, fd);
nbytes = fwrite(&(mphf->size), sizeof(mphf->size), (size_t)1, fd);
}
cmph_t *__cmph_load(FILE *f)
cmph_t *__cmph_load(FILE *f)
{
cmph_t *mphf = NULL;
cmph_uint32 i;
@ -36,7 +36,7 @@ cmph_t *__cmph_load(FILE *f)
char *ptr = algo_name;
CMPH_ALGO algo = CMPH_COUNT;
register size_t nbytes;
DEBUGP("Loading mphf\n");
while(1)
{
@ -52,7 +52,7 @@ cmph_t *__cmph_load(FILE *f)
algo = i;
}
}
if (algo == CMPH_COUNT)
if (algo == CMPH_COUNT)
{
DEBUGP("Algorithm %s not found\n", algo_name);
return NULL;
@ -65,5 +65,3 @@ cmph_t *__cmph_load(FILE *f)
return mphf;
}

View File

@ -4,6 +4,7 @@
djb2_state_t *djb2_state_new()
{
djb2_state_t *state = (djb2_state_t *)malloc(sizeof(djb2_state_t));
if (!djb2_state) return NULL;
state->hashfunc = CMPH_HASH_DJB2;
return state;
}
@ -18,7 +19,7 @@ cmph_uint32 djb2_hash(djb2_state_t *state, const char *k, cmph_uint32 keylen)
register cmph_uint32 hash = 5381;
const unsigned char *ptr = (unsigned char *)k;
cmph_uint32 i = 0;
while (i < keylen)
while (i < keylen)
{
hash = hash*33 ^ *ptr;
++ptr, ++i;

View File

@ -23,7 +23,7 @@ fch_config_data_t *fch_config_new()
{
fch_config_data_t *fch;
fch = (fch_config_data_t *)malloc(sizeof(fch_config_data_t));
assert(fch);
if (!fch) return NULL;
memset(fch, 0, sizeof(fch_config_data_t));
fch->hashfuncs[0] = CMPH_HASH_JENKINS;
fch->hashfuncs[1] = CMPH_HASH_JENKINS;
@ -50,7 +50,7 @@ void fch_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
while(*hashptr != CMPH_HASH_COUNT)
{
if (i >= 2) break; //fch only uses two hash functions
fch->hashfuncs[i] = *hashptr;
fch->hashfuncs[i] = *hashptr;
++i, ++hashptr;
}
}
@ -88,36 +88,36 @@ static fch_buckets_t * mapping(cmph_config_t *mph)
fch_buckets_t *buckets = NULL;
fch_config_data_t *fch = (fch_config_data_t *)mph->data;
if (fch->h1) hash_state_destroy(fch->h1);
fch->h1 = hash_state_new(fch->hashfuncs[0], fch->m);
fch->h1 = hash_state_new(fch->hashfuncs[0], fch->m);
fch->b = fch_calc_b(fch->c, fch->m);
fch->p1 = fch_calc_p1(fch->m);
fch->p2 = fch_calc_p2(fch->b);
//DEBUGP("b:%u p1:%f p2:%f\n", fch->b, fch->p1, fch->p2);
buckets = fch_buckets_new(fch->b);
mph->key_source->rewind(mph->key_source->data);
mph->key_source->rewind(mph->key_source->data);
for(i = 0; i < fch->m; i++)
{
cmph_uint32 h1, keylen;
char *key = NULL;
mph->key_source->read(mph->key_source->data, &key, &keylen);
mph->key_source->read(mph->key_source->data, &key, &keylen);
h1 = hash(fch->h1, key, keylen) % fch->m;
h1 = mixh10h11h12 (fch->b, fch->p1, fch->p2, h1);
fch_buckets_insert(buckets, h1, key, keylen);
key = NULL; // transger memory ownership
}
return buckets;
return buckets;
}
// returns the buckets indexes sorted by their sizes.
// returns the buckets indexes sorted by their sizes.
static cmph_uint32 * ordering(fch_buckets_t * buckets)
{
return fch_buckets_get_indexes_sorted_by_size(buckets);
}
/* Check whether function h2 causes collisions among the keys of each bucket */
/* Check whether function h2 causes collisions among the keys of each bucket */
static cmph_uint8 check_for_collisions_h2(fch_config_data_t *fch, fch_buckets_t * buckets, cmph_uint32 *sorted_indexes)
{
//cmph_uint32 max_size = fch_buckets_get_max_size(buckets);
@ -146,7 +146,7 @@ static cmph_uint8 check_for_collisions_h2(fch_config_data_t *fch, fch_buckets_t
}
static void permut(cmph_uint32 * vector, cmph_uint32 n)
{
{
cmph_uint32 i, j, b;
for (i = 0; i < n; i++) {
j = (cmph_uint32) rand() % n;
@ -179,12 +179,12 @@ static cmph_uint8 searching(fch_config_data_t *fch, fch_buckets_t *buckets, cmph
{
map_table[random_table[i]] = i;
}
do {
do {
if (fch->h2) hash_state_destroy(fch->h2);
fch->h2 = hash_state_new(fch->hashfuncs[1], fch->m);
fch->h2 = hash_state_new(fch->hashfuncs[1], fch->m);
restart = check_for_collisions_h2(fch, buckets, sorted_indexes);
filled_count = 0;
if (!restart)
if (!restart)
{
searching_iterations++; iteration_to_generate_h2 = 0;
//DEBUGP("searching_iterations: %u\n", searching_iterations);
@ -192,7 +192,7 @@ static cmph_uint8 searching(fch_config_data_t *fch, fch_buckets_t *buckets, cmph
else {
iteration_to_generate_h2++;
//DEBUGP("iteration_to_generate_h2: %u\n", iteration_to_generate_h2);
}
}
for(i = 0; (i < nbuckets) && !restart; i++) {
cmph_uint32 bucketsize = fch_buckets_get_size(buckets, sorted_indexes[i]);
if (bucketsize == 0)
@ -204,8 +204,8 @@ static cmph_uint8 searching(fch_config_data_t *fch, fch_buckets_t *buckets, cmph
for(z = 0; (z < (fch->m - filled_count)) && restart; z++) {
char * key = fch_buckets_get_key(buckets, sorted_indexes[i], INDEX);
cmph_uint32 keylen = fch_buckets_get_keylength(buckets, sorted_indexes[i], INDEX);
cmph_uint32 h2 = hash(fch->h2, key, keylen) % fch->m;
counter = 0;
cmph_uint32 h2 = hash(fch->h2, key, keylen) % fch->m;
counter = 0;
restart = 0; // false
fch->g[sorted_indexes[i]] = (fch->m + random_table[filled_count + z] - h2) % fch->m;
//DEBUGP("g[%u]: %u\n", sorted_indexes[i], fch->g[sorted_indexes[i]]);
@ -217,7 +217,7 @@ static cmph_uint8 searching(fch_config_data_t *fch, fch_buckets_t *buckets, cmph
h2 = hash(fch->h2, key, keylen) % fch->m;
index = (h2 + fch->g[sorted_indexes[i]]) % fch->m;
//DEBUGP("key:%s keylen:%u index: %u h2:%u bucketsize:%u\n", key, keylen, index, h2, bucketsize);
if (map_table[index] >= filled_count) {
if (map_table[index] >= filled_count) {
cmph_uint32 y = map_table[index];
cmph_uint32 ry = random_table[y];
random_table[y] = random_table[filled_count];
@ -225,19 +225,19 @@ static cmph_uint8 searching(fch_config_data_t *fch, fch_buckets_t *buckets, cmph
map_table[random_table[y]] = y;
map_table[random_table[filled_count]] = filled_count;
filled_count++;
counter ++;
counter ++;
}
else {
else {
restart = 1; // true
filled_count = filled_count - counter;
counter = 0;
counter = 0;
break;
}
j = (j + 1) % bucketsize;
} while(j % bucketsize != INDEX);
} while(j % bucketsize != INDEX);
}
//getchar();
}
}
} while(restart && (searching_iterations < 10) && (iteration_to_generate_h2 < 1000));
free(map_table);
free(random_table);
@ -264,7 +264,7 @@ cmph_t *fch_new(cmph_config_t *mph, double c)
fch->h2 = NULL;
fch->g = NULL;
do
{
{
if (mph->verbosity)
{
fprintf(stderr, "Entering mapping step for mph creation of %u keys\n", fch->m);
@ -283,7 +283,7 @@ cmph_t *fch_new(cmph_config_t *mph, double c)
}
restart_mapping = searching(fch, buckets, sorted_indexes);
iterations--;
} while(restart_mapping && iterations > 0);
if (buckets) fch_buckets_destroy(buckets);
if (sorted_indexes) free (sorted_indexes);
@ -317,7 +317,7 @@ int fch_dump(cmph_t *mphf, FILE *fd)
char *buf = NULL;
cmph_uint32 buflen;
register size_t nbytes;
fch_data_t *data = (fch_data_t *)mphf->data;
__cmph_dump(mphf, fd);
@ -365,7 +365,7 @@ void fch_load(FILE *f, cmph_t *mphf)
nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
fch->h1 = hash_state_load(buf, buflen);
free(buf);
//DEBUGP("Loading fch mphf\n");
mphf->data = fch;
//DEBUGP("Reading h2\n");
@ -376,8 +376,8 @@ void fch_load(FILE *f, cmph_t *mphf)
nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
fch->h2 = hash_state_load(buf, buflen);
free(buf);
//DEBUGP("Reading m and n\n");
nbytes = fread(&(fch->m), sizeof(cmph_uint32), (size_t)1, f);
nbytes = fread(&(fch->c), sizeof(double), (size_t)1, f);
@ -418,7 +418,7 @@ void fch_destroy(cmph_t *mphf)
/** \fn void fch_pack(cmph_t *mphf, void *packed_mphf);
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
* \param mphf pointer to the resulting mphf
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
*/
void fch_pack(cmph_t *mphf, void *packed_mphf)
{
@ -450,37 +450,37 @@ void fch_pack(cmph_t *mphf, void *packed_mphf)
// packing b
*((cmph_uint32 *) ptr) = data->b;
ptr += sizeof(data->b);
// packing p1
*((cmph_uint64 *)ptr) = (cmph_uint64)data->p1;
*((cmph_uint64 *)ptr) = (cmph_uint64)data->p1;
ptr += sizeof(data->p1);
// packing p2
*((cmph_uint64 *)ptr) = (cmph_uint64)data->p2;
*((cmph_uint64 *)ptr) = (cmph_uint64)data->p2;
ptr += sizeof(data->p2);
// packing g
memcpy(ptr, data->g, sizeof(cmph_uint32)*(data->b));
memcpy(ptr, data->g, sizeof(cmph_uint32)*(data->b));
}
/** \fn cmph_uint32 fch_packed_size(cmph_t *mphf);
* \brief Return the amount of space needed to pack mphf.
* \param mphf pointer to a mphf
* \return the size of the packed function or zero for failures
*/
*/
cmph_uint32 fch_packed_size(cmph_t *mphf)
{
fch_data_t *data = (fch_data_t *)mphf->data;
CMPH_HASH h1_type = hash_get_type(data->h1);
CMPH_HASH h2_type = hash_get_type(data->h2);
CMPH_HASH h1_type = hash_get_type(data->h1);
CMPH_HASH h2_type = hash_get_type(data->h2);
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
4*sizeof(cmph_uint32) + 2*sizeof(double) + sizeof(cmph_uint32)*(data->b));
}
/** cmph_uint32 fch_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
* \brief Use the packed mphf to do a search.
* \brief Use the packed mphf to do a search.
* \param packed_mphf pointer to the packed mphf
* \param key key to be hashed
* \param keylen key legth in bytes
@ -495,12 +495,12 @@ cmph_uint32 fch_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke
register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr);
h2_ptr += 4;
register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type));
register cmph_uint32 m = *g_ptr++;
register cmph_uint32 b = *g_ptr++;
register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type));
register cmph_uint32 m = *g_ptr++;
register cmph_uint32 b = *g_ptr++;
register double p1 = (double)(*((cmph_uint64 *)g_ptr));
g_ptr += 2;
@ -508,10 +508,9 @@ cmph_uint32 fch_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke
register double p2 = (double)(*((cmph_uint64 *)g_ptr));
g_ptr += 2;
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % m;
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % m;
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % m;
h1 = mixh10h11h12 (b, p1, p2, h1);
return (h2 + g_ptr[h1]) % m;
}

View File

@ -20,7 +20,7 @@ typedef struct __fch_bucket_t
static void fch_bucket_new(fch_bucket_t *bucket)
static void fch_bucket_new(fch_bucket_t *bucket)
{
assert(bucket);
bucket->size = 0;
@ -109,16 +109,16 @@ struct __fch_buckets_t
{
fch_bucket_t * values;
cmph_uint32 nbuckets, max_size;
};
fch_buckets_t * fch_buckets_new(cmph_uint32 nbuckets)
{
cmph_uint32 i;
fch_buckets_t *buckets = (fch_buckets_t *)malloc(sizeof(fch_buckets_t));
assert(buckets);
if (!buckets) return NULL;
buckets->values = (fch_bucket_t *)calloc((size_t)nbuckets, sizeof(fch_bucket_t));
for (i = 0; i < nbuckets; i++) fch_bucket_new(buckets->values + i);
for (i = 0; i < nbuckets; i++) fch_bucket_new(buckets->values + i);
assert(buckets->values);
buckets->nbuckets = nbuckets;
buckets->max_size = 0;
@ -135,7 +135,7 @@ void fch_buckets_insert(fch_buckets_t * buckets, cmph_uint32 index, char * key,
{
assert(index < buckets->nbuckets);
fch_bucket_insert(buckets->values + index, key, length);
if (fch_bucket_size(buckets->values + index) > buckets->max_size)
if (fch_bucket_size(buckets->values + index) > buckets->max_size)
{
buckets->max_size = fch_bucket_size(buckets->values + index);
}
@ -170,16 +170,16 @@ cmph_uint32 fch_buckets_get_nbuckets(fch_buckets_t * buckets)
return buckets->nbuckets;
}
cmph_uint32 * fch_buckets_get_indexes_sorted_by_size(fch_buckets_t * buckets)
cmph_uint32 * fch_buckets_get_indexes_sorted_by_size(fch_buckets_t * buckets)
{
cmph_uint32 i = 0;
cmph_uint32 sum = 0, value;
cmph_uint32 *nbuckets_size = (cmph_uint32 *) calloc((size_t)buckets->max_size + 1, sizeof(cmph_uint32));
cmph_uint32 * sorted_indexes = (cmph_uint32 *) calloc((size_t)buckets->nbuckets, sizeof(cmph_uint32));
// collect how many buckets for each size.
for(i = 0; i < buckets->nbuckets; i++) nbuckets_size[fch_bucket_size(buckets->values + i)] ++;
// calculating offset considering a decreasing order of buckets size.
value = nbuckets_size[buckets->max_size];
nbuckets_size[buckets->max_size] = sum;
@ -188,13 +188,13 @@ cmph_uint32 * fch_buckets_get_indexes_sorted_by_size(fch_buckets_t * buckets)
sum += value;
value = nbuckets_size[i];
nbuckets_size[i] = sum;
}
for(i = 0; i < buckets->nbuckets; i++)
for(i = 0; i < buckets->nbuckets; i++)
{
sorted_indexes[nbuckets_size[fch_bucket_size(buckets->values + i)]] = (cmph_uint32)i;
nbuckets_size[fch_bucket_size(buckets->values + i)] ++;
}
}
free(nbuckets_size);
return sorted_indexes;
}
@ -208,7 +208,7 @@ void fch_buckets_print(fch_buckets_t * buckets)
void fch_buckets_destroy(fch_buckets_t * buckets)
{
cmph_uint32 i;
for (i = 0; i < buckets->nbuckets; i++) fch_bucket_destroy(buckets->values + i);
for (i = 0; i < buckets->nbuckets; i++) fch_bucket_destroy(buckets->values + i);
free(buckets->values);
free(buckets);
}

View File

@ -4,6 +4,7 @@
fnv_state_t *fnv_state_new()
{
fnv_state_t *state = (fnv_state_t *)malloc(sizeof(fnv_state_t));
if (!state) return NULL;
state->hashfunc = CMPH_HASH_FNV;
return state;
}
@ -15,13 +16,13 @@ void fnv_state_destroy(fnv_state_t *state)
cmph_uint32 fnv_hash(fnv_state_t *state, const char *k, cmph_uint32 keylen)
{
const unsigned char *bp = (const unsigned char *)k;
const unsigned char *be = bp + keylen;
static unsigned int hval = 0;
const unsigned char *bp = (const unsigned char *)k;
const unsigned char *be = bp + keylen;
static unsigned int hval = 0;
while (bp < be)
while (bp < be)
{
//hval *= 0x01000193; good for non-gcc compiler
hval += (hval << 1) + (hval << 4) + (hval << 7) + (hval << 8) + (hval << 24); //good for gcc
@ -41,6 +42,7 @@ void fnv_state_dump(fnv_state_t *state, char **buf, cmph_uint32 *buflen)
fnv_state_t * fnv_state_copy(fnv_state_t *src_state)
{
fnv_state_t *dest_state = (fnv_state_t *)malloc(sizeof(fnv_state_t));
if (!dest_state) return NULL;
dest_state->hashfunc = src_state->hashfunc;
return dest_state;
}

View File

@ -77,7 +77,7 @@ void graph_print(graph_t *g)
printf("%u -> %u\n", g->edges[abs_edge(e, 0)], g->edges[abs_edge(e, 1)]);
}
}
}
return;
}
@ -130,7 +130,7 @@ static void del_edge_point(graph_t *g, cmph_uint32 v1, cmph_uint32 v2)
DEBUGP("Deleting edge point %u %u\n", v1, v2);
e = g->first[v1];
if (check_edge(g, e, v1, v2))
if (check_edge(g, e, v1, v2))
{
g->first[v1] = g->next[e];
//g->edges[e] = EMPTY;
@ -151,7 +151,7 @@ static void del_edge_point(graph_t *g, cmph_uint32 v1, cmph_uint32 v2)
DEBUGP("Deleted\n");
}
void graph_del_edge(graph_t *g, cmph_uint32 v1, cmph_uint32 v2)
{
g->shrinking = 1;
@ -163,7 +163,7 @@ void graph_clear_edges(graph_t *g)
{
cmph_uint32 i;
for (i = 0; i < g->nnodes; ++i) g->first[i] = EMPTY;
for (i = 0; i < g->nedges*2; ++i)
for (i = 0; i < g->nedges*2; ++i)
{
g->edges[i] = EMPTY;
g->next[i] = EMPTY;
@ -178,7 +178,7 @@ static cmph_uint8 find_degree1_edge(graph_t *g, cmph_uint32 v, cmph_uint8 *delet
cmph_uint8 found = 0;
DEBUGP("Checking degree of vertex %u connected to edge %u\n", v, edge);
if (edge == EMPTY) return 0;
else if (!(GETBIT(deleted, abs_edge(edge, 0))))
else if (!(GETBIT(deleted, abs_edge(edge, 0))))
{
found = 1;
*e = edge;
@ -206,17 +206,17 @@ static void cyclic_del_edge(graph_t *g, cmph_uint32 v, cmph_uint8 *deleted)
degree1 = find_degree1_edge(g, v1, deleted, &e);
if (!degree1) return;
while(1)
while(1)
{
DEBUGP("Deleting edge %u (%u->%u)\n", e, g->edges[abs_edge(e, 0)], g->edges[abs_edge(e, 1)]);
SETBIT(deleted, abs_edge(e, 0));
v2 = g->edges[abs_edge(e, 0)];
if (v2 == v1) v2 = g->edges[abs_edge(e, 1)];
DEBUGP("Checking if second endpoint %u has degree 1\n", v2);
DEBUGP("Checking if second endpoint %u has degree 1\n", v2);
degree1 = find_degree1_edge(g, v2, deleted, &e);
if (degree1)
if (degree1)
{
DEBUGP("Inspecting vertex %u\n", v2);
v1 = v2;
@ -240,7 +240,7 @@ int graph_is_cyclic(graph_t *g)
}
for (i = 0; i < g->nedges; ++i)
{
if (!(GETBIT(deleted, i)))
if (!(GETBIT(deleted, i)))
{
DEBUGP("Edge %u %u->%u was not deleted\n", i, g->edges[i], g->edges[i + g->nedges]);
free(deleted);
@ -275,15 +275,15 @@ void graph_obtain_critical_nodes(graph_t *g) /* included -- Fabiano*/
for (i = 0; i < g->nedges; ++i)
{
if (!(GETBIT(deleted,i)))
if (!(GETBIT(deleted,i)))
{
DEBUGP("Edge %u %u->%u belongs to the 2-core\n", i, g->edges[i], g->edges[i + g->nedges]);
if(!(GETBIT(g->critical_nodes,g->edges[i])))
if(!(GETBIT(g->critical_nodes,g->edges[i])))
{
g->ncritical_nodes ++;
SETBIT(g->critical_nodes,g->edges[i]);
}
if(!(GETBIT(g->critical_nodes,g->edges[i + g->nedges])))
if(!(GETBIT(g->critical_nodes,g->edges[i + g->nedges])))
{
g->ncritical_nodes ++;
SETBIT(g->critical_nodes,g->edges[i + g->nedges]);
@ -328,11 +328,9 @@ graph_iterator_t graph_neighbors_it(graph_t *g, cmph_uint32 v)
cmph_uint32 graph_next_neighbor(graph_t *g, graph_iterator_t* it)
{
cmph_uint32 ret;
if(it->edge == EMPTY) return GRAPH_NO_NEIGHBOR;
if(it->edge == EMPTY) return GRAPH_NO_NEIGHBOR;
if (g->edges[it->edge] == it->vertex) ret = g->edges[it->edge + g->nedges];
else ret = g->edges[it->edge];
it->edge = g->next[it->edge];
return ret;
}

View File

@ -133,7 +133,7 @@ void hash_state_destroy(hash_state_t *state)
* \brief Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed.
* \param state points to the hash function
* \param hash_packed pointer to the contiguous memory area used to store the hash function. The size of hash_packed must be at least hash_state_packed_size()
*
*
* Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed.
* However, the hash function type must be packed outside.
*/
@ -142,20 +142,20 @@ void hash_state_pack(hash_state_t *state, void *hash_packed)
switch (state->hashfunc)
{
case CMPH_HASH_JENKINS:
// pack the jenkins hash function
// pack the jenkins hash function
jenkins_state_pack((jenkins_state_t *)state, hash_packed);
break;
default:
assert(0);
}
return;
return;
}
/** \fn cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc)
* \brief Return the amount of space needed to pack a hash function.
* \param hashfunc function type
* \return the size of the packed function or zero for failures
*/
*/
cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc)
{
cmph_uint32 size = 0;
@ -197,7 +197,7 @@ cmph_uint32 hash_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cm
* \param hashes is a pointer to a memory large enough to fit three 32-bit integers.
*/
void hash_vector_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
{
{
switch (hashfunc)
{
case CMPH_HASH_JENKINS:

View File

@ -41,7 +41,7 @@ void hashtree_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
while(*hashptr != CMPH_HASH_COUNT)
{
if (i >= 3) break; //hashtree only uses three hash functions
hashtree->hashfuncs[i] = *hashptr;
hashtree->hashfuncs[i] = *hashptr;
++i, ++hashptr;
}
}
@ -55,8 +55,8 @@ cmph_t *hashtree_new(cmph_config_t *mph, double c)
cmph_uint32 iterations = 20;
cmph_uint8 *visited = NULL;
hashtree_config_data_t *hashtree = (hashtree_config_data_t *)mph->data;
hashtree->m = mph->key_source->nkeys;
hashtree->n = ceil(c * mph->key_source->nkeys);
hashtree->m = mph->key_source->nkeys;
hashtree->n = ceil(c * mph->key_source->nkeys);
DEBUGP("m (edges): %u n (vertices): %u c: %f\n", hashtree->m, hashtree->n, c);
hashtree->graph = graph_new(hashtree->n, hashtree->m);
DEBUGP("Created graph\n");
@ -87,12 +87,12 @@ cmph_t *hashtree_new(cmph_config_t *mph, double c)
fprintf(stderr, "Acyclic graph creation failure - %u iterations remaining\n", iterations);
}
if (iterations == 0) break;
}
else break;
}
else break;
}
if (iterations == 0)
{
graph_destroy(hashtree->graph);
graph_destroy(hashtree->graph);
return NULL;
}
@ -115,7 +115,7 @@ cmph_t *hashtree_new(cmph_config_t *mph, double c)
hashtree_traverse(hashtree, visited, i);
}
}
graph_destroy(hashtree->graph);
graph_destroy(hashtree->graph);
free(visited);
hashtree->graph = NULL;
@ -144,7 +144,7 @@ static void hashtree_traverse(hashtree_config_data_t *hashtree, cmph_uint8 *visi
graph_iterator_t it = graph_neighbors_it(hashtree->graph, v);
cmph_uint32 neighbor = 0;
SETBIT(visited,v);
DEBUGP("Visiting vertex %u\n", v);
while((neighbor = graph_next_neighbor(hashtree->graph, &it)) != GRAPH_NO_NEIGHBOR)
{
@ -157,7 +157,7 @@ static void hashtree_traverse(hashtree_config_data_t *hashtree, cmph_uint8 *visi
hashtree_traverse(hashtree, visited, neighbor);
}
}
static int hashtree_gen_edges(cmph_config_t *mph)
{
cmph_uint32 e;
@ -165,7 +165,7 @@ static int hashtree_gen_edges(cmph_config_t *mph)
int cycles = 0;
DEBUGP("Generating edges for %u vertices with hash functions %s and %s\n", hashtree->n, cmph_hash_names[hashtree->hashfuncs[0]], cmph_hash_names[hashtree->hashfuncs[1]]);
graph_clear_edges(hashtree->graph);
graph_clear_edges(hashtree->graph);
mph->key_source->rewind(mph->key_source->data);
for (e = 0; e < mph->key_source->nkeys; ++e)
{
@ -176,7 +176,7 @@ static int hashtree_gen_edges(cmph_config_t *mph)
h1 = hash(hashtree->hashes[0], key, keylen) % hashtree->n;
h2 = hash(hashtree->hashes[1], key, keylen) % hashtree->n;
if (h1 == h2) if (++h2 >= hashtree->n) h2 = 0;
if (h1 == h2)
if (h1 == h2)
{
if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e);
mph->key_source->dispose(mph->key_source->data, key, keylen);
@ -216,7 +216,7 @@ int hashtree_dump(cmph_t *mphf, FILE *fd)
fwrite(&(data->n), sizeof(cmph_uint32), 1, fd);
fwrite(&(data->m), sizeof(cmph_uint32), 1, fd);
fwrite(data->g, sizeof(cmph_uint32)*data->n, 1, fd);
#ifdef DEBUG
fprintf(stderr, "G: ");
@ -253,8 +253,8 @@ void hashtree_load(FILE *f, cmph_t *mphf)
}
DEBUGP("Reading m and n\n");
fread(&(hashtree->n), sizeof(cmph_uint32), 1, f);
fread(&(hashtree->m), sizeof(cmph_uint32), 1, f);
fread(&(hashtree->n), sizeof(cmph_uint32), 1, f);
fread(&(hashtree->m), sizeof(cmph_uint32), 1, f);
hashtree->g = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*hashtree->n);
fread(hashtree->g, hashtree->n*sizeof(cmph_uint32), 1, f);
@ -265,7 +265,7 @@ void hashtree_load(FILE *f, cmph_t *mphf)
#endif
return;
}
cmph_uint32 hashtree_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
{
@ -280,7 +280,7 @@ cmph_uint32 hashtree_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
void hashtree_destroy(cmph_t *mphf)
{
hashtree_data_t *data = (hashtree_data_t *)mphf->data;
free(data->g);
free(data->g);
hash_state_destroy(data->hashes[0]);
hash_state_destroy(data->hashes[1]);
free(data->hashes);

View File

@ -28,16 +28,16 @@
have at least 1/4 probability of changing.
* If mix() is run forward, every bit of c will change between 1/3 and
2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
mix() was built out of 36 single-cycle latency instructions in a
mix() was built out of 36 single-cycle latency instructions in a
structure that could supported 2x parallelism, like so:
a -= b;
a -= b;
a -= c; x = (c>>13);
b -= c; a ^= x;
b -= a; x = (a<<8);
c -= a; b ^= x;
c -= b; x = (b>>13);
...
Unfortunately, superscalar Pentiums and Sparcs can't take advantage
Unfortunately, superscalar Pentiums and Sparcs can't take advantage
of that parallelism. They've also turned some of those single-cycle
latency instructions into multi-cycle latency instructions. Still,
this is the fastest good hash I could find. There were about 2^^68
@ -87,6 +87,7 @@ acceptable. Do NOT use for cryptographic purposes.
jenkins_state_t *jenkins_state_new(cmph_uint32 size) //size of hash table
{
jenkins_state_t *state = (jenkins_state_t *)malloc(sizeof(jenkins_state_t));
if (!state) return NULL;
DEBUGP("Initializing jenkins hash\n");
state->seed = ((cmph_uint32)rand() % size);
return state;
@ -121,28 +122,28 @@ static inline void __jenkins_hash_vector(cmph_uint32 seed, const char *k, cmph_u
hashes[2] += length;
switch(len) /* all the case statements fall through */
{
case 11:
case 11:
hashes[2] +=((cmph_uint32)k[10]<<24);
case 10:
case 10:
hashes[2] +=((cmph_uint32)k[9]<<16);
case 9 :
case 9 :
hashes[2] +=((cmph_uint32)k[8]<<8);
/* the first byte of hashes[2] is reserved for the length */
case 8 :
case 8 :
hashes[1] +=((cmph_uint32)k[7]<<24);
case 7 :
case 7 :
hashes[1] +=((cmph_uint32)k[6]<<16);
case 6 :
case 6 :
hashes[1] +=((cmph_uint32)k[5]<<8);
case 5 :
hashes[1] +=(cmph_uint8) k[4];
case 4 :
case 4 :
hashes[0] +=((cmph_uint32)k[3]<<24);
case 3 :
case 3 :
hashes[0] +=((cmph_uint32)k[2]<<16);
case 2 :
case 2 :
hashes[0] +=((cmph_uint32)k[1]<<8);
case 1 :
case 1 :
hashes[0] +=(cmph_uint8)k[0];
/* case 0: nothing left to add */
}
@ -158,13 +159,13 @@ cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keyl
/* cmph_uint32 a, b, c;
cmph_uint32 len, length;
// Set up the internal state
// Set up the internal state
length = keylen;
len = length;
a = b = 0x9e3779b9; // the golden ratio; an arbitrary value
c = state->seed; // the previous hash value - seed in our case
a = b = 0x9e3779b9; // the golden ratio; an arbitrary value
c = state->seed; // the previous hash value - seed in our case
// handle most of the key
// handle most of the key
while (len >= 12)
{
a += (k[0] +((cmph_uint32)k[1]<<8) +((cmph_uint32)k[2]<<16) +((cmph_uint32)k[3]<<24));
@ -176,37 +177,37 @@ cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keyl
// handle the last 11 bytes
c += length;
switch(len) /// all the case statements fall through
switch(len) /// all the case statements fall through
{
case 11:
case 11:
c +=((cmph_uint32)k[10]<<24);
case 10:
case 10:
c +=((cmph_uint32)k[9]<<16);
case 9 :
case 9 :
c +=((cmph_uint32)k[8]<<8);
// the first byte of c is reserved for the length
case 8 :
// the first byte of c is reserved for the length
case 8 :
b +=((cmph_uint32)k[7]<<24);
case 7 :
case 7 :
b +=((cmph_uint32)k[6]<<16);
case 6 :
case 6 :
b +=((cmph_uint32)k[5]<<8);
case 5 :
case 5 :
b +=k[4];
case 4 :
case 4 :
a +=((cmph_uint32)k[3]<<24);
case 3 :
case 3 :
a +=((cmph_uint32)k[2]<<16);
case 2 :
case 2 :
a +=((cmph_uint32)k[1]<<8);
case 1 :
case 1 :
a +=k[0];
// case 0: nothing left to add
// case 0: nothing left to add
}
mix(a,b,c);
/// report the result
/// report the result
return c;
*/
@ -221,7 +222,7 @@ void jenkins_state_dump(jenkins_state_t *state, char **buf, cmph_uint32 *buflen)
{
*buflen = sizeof(cmph_uint32);
*buf = (char *)malloc(sizeof(cmph_uint32));
if (!*buf)
if (!*buf)
{
*buflen = UINT_MAX;
return;
@ -252,7 +253,7 @@ jenkins_state_t *jenkins_state_load(const char *buf, cmph_uint32 buflen)
/** \fn void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed);
* \brief Support the ability to pack a jenkins function into a preallocated contiguous memory space pointed by jenkins_packed.
* \param state points to the jenkins function
* \param jenkins_packed pointer to the contiguous memory area used to store the jenkins function. The size of jenkins_packed must be at least jenkins_state_packed_size()
* \param jenkins_packed pointer to the contiguous memory area used to store the jenkins function. The size of jenkins_packed must be at least jenkins_state_packed_size()
*/
void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed)
{
@ -265,7 +266,7 @@ void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed)
/** \fn cmph_uint32 jenkins_state_packed_size(jenkins_state_t *state);
* \brief Return the amount of space needed to pack a jenkins function.
* \return the size of the packed function or zero for failures
*/
*/
cmph_uint32 jenkins_state_packed_size(void)
{
return sizeof(cmph_uint32);

View File

@ -12,6 +12,7 @@ struct __linear_string_map_t {
lsmap_t *lsmap_new() {
lsmap_t* lsmap = (lsmap_t*)malloc(sizeof(lsmap_t));
if (!lsmap) return NULL;
lsmap->key = "dummy node";
lsmap->next = NULL;
return lsmap;
@ -42,7 +43,7 @@ void* lsmap_search(lsmap_t *lsmap, const char *key) {
}
return NULL;
}
void lsmap_foreach_key(lsmap_t *lsmap, void (*f)(const char*)) {
while (lsmap->next != NULL) {
f(lsmap->key);
@ -65,4 +66,3 @@ void lsmap_destroy(lsmap_t *lsmap) {
}
free(lsmap);
}

View File

@ -22,13 +22,13 @@
void usage(const char *prg)
{
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c algorithm_dependent_value][-s seed] ] [-a algorithm] [-M memory_in_MB] [-b algorithm_dependent_value] [-t keys_per_bin] [-d tmp_dir] [-m file.mph] keysfile\n", prg);
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c algorithm_dependent_value][-s seed] ] [-a algorithm] [-M memory_in_MB] [-b algorithm_dependent_value] [-t keys_per_bin] [-d tmp_dir] [-m file.mph] keysfile\n", prg);
}
void usage_long(const char *prg)
{
cmph_uint32 i;
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c algorithm_dependent_value][-s seed] ] [-a algorithm] [-M memory_in_MB] [-b algorithm_dependent_value] [-t keys_per_bin] [-d tmp_dir] [-m file.mph] keysfile\n", prg);
fprintf(stderr, "Minimum perfect hashing tool\n\n");
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c algorithm_dependent_value][-s seed] ] [-a algorithm] [-M memory_in_MB] [-b algorithm_dependent_value] [-t keys_per_bin] [-d tmp_dir] [-m file.mph] keysfile\n", prg);
fprintf(stderr, "Minimum perfect hashing tool\n\n");
fprintf(stderr, " -h\t print this help message\n");
fprintf(stderr, " -c\t c value determines:\n");
fprintf(stderr, " \t * the number of vertices in the graph for the algorithms BMZ and CHM\n");
@ -57,7 +57,7 @@ void usage_long(const char *prg)
fprintf(stderr, " \t and its value should be an integer in the range [1,32]. Default is 4. The\n");
fprintf(stderr, " \t larger is this value, the slower is the construction of the functions.\n");
fprintf(stderr, " \t This parameter has no effect for other algorithms.\n\n");
fprintf(stderr, " -t\t set the number of keys per bin for a t-perfect hashing function. A t-perfect\n");
fprintf(stderr, " -t\t set the number of keys per bin for a t-perfect hashing function. A t-perfect\n");
fprintf(stderr, " \t hash function allows at most t collisions in a given bin. This parameter applies\n");
fprintf(stderr, " \t only to the CHD and CHD_PH algorithms. Its value should be an integer in the\n");
fprintf(stderr, " \t range [1,128]. Defaul is 1\n");
@ -182,7 +182,7 @@ int main(int argc, char **argv)
break;
}
}
if (!valid)
if (!valid)
{
fprintf(stderr, "Invalid mph algorithm: %s. It is not available in version %s\n", optarg, VERSION);
return -1;
@ -204,7 +204,7 @@ int main(int argc, char **argv)
break;
}
}
if (!valid)
if (!valid)
{
fprintf(stderr, "Invalid hash function: %s\n", optarg);
return -1;
@ -223,7 +223,7 @@ int main(int argc, char **argv)
return 1;
}
keys_file = argv[optind];
if (seed == UINT_MAX) seed = (cmph_uint32)time(NULL);
srand(seed);
int ret = 0;
@ -232,7 +232,7 @@ int main(int argc, char **argv)
mphf_file = (char *)malloc(strlen(keys_file) + 5);
memcpy(mphf_file, keys_file, strlen(keys_file));
memcpy(mphf_file + strlen(keys_file), ".mph\0", (size_t)5);
}
}
keys_fd = fopen(keys_file, "r");
@ -258,7 +258,7 @@ int main(int argc, char **argv)
cmph_config_set_memory_availability(config, memory_availability);
cmph_config_set_b(config, b);
cmph_config_set_keys_per_bin(config, keys_per_bin);
//if((mph_algo == CMPH_BMZ || mph_algo == CMPH_BRZ) && c >= 2.0) c=1.15;
if(mph_algo == CMPH_BMZ && c >= 2.0) c=1.15;
if (c != 0) cmph_config_set_graphsize(config, c);
@ -279,8 +279,8 @@ int main(int argc, char **argv)
free(mphf_file);
return -1;
}
cmph_dump(mphf, mphf_fd);
cmph_destroy(mphf);
cmph_dump(mphf, mphf_fd);
cmph_destroy(mphf);
fclose(mphf_fd);
}
else
@ -329,7 +329,7 @@ int main(int argc, char **argv)
}
source->dispose(source->data, buf, buflen);
}
cmph_destroy(mphf);
free(hashtable);
}
@ -338,5 +338,5 @@ int main(int argc, char **argv)
free(tmp_dir);
cmph_io_nlfile_adapter_destroy(source);
return ret;
}

View File

@ -4,6 +4,7 @@
sdbm_state_t *sdbm_state_new()
{
sdbm_state_t *state = (sdbm_state_t *)malloc(sizeof(sdbm_state_t));
if (!state) return NULL;
state->hashfunc = CMPH_HASH_SDBM;
return state;
}

View File

@ -12,7 +12,7 @@ vqueue_t * vqueue_new(cmph_uint32 capacity)
{
size_t capacity_plus_one = capacity + 1;
vqueue_t *q = (vqueue_t *)malloc(sizeof(vqueue_t));
assert(q);
if (!q) return NULL;
q->values = (cmph_uint32 *)calloc(capacity_plus_one, sizeof(cmph_uint32));
q->beg = q->end = 0;
q->capacity = (cmph_uint32) capacity_plus_one;
@ -43,7 +43,7 @@ void vqueue_print(vqueue_t * q)
cmph_uint32 i;
for (i = q->beg; i != q->end; i = (i + 1)%q->capacity)
fprintf(stderr, "%u\n", q->values[(i + 1)%q->capacity]);
}
}
void vqueue_destroy(vqueue_t *q)
{

View File

@ -76,4 +76,3 @@ void vstack_reserve(vstack_t *stack, cmph_uint32 size)
DEBUGP("Increased\n");
}
}