Merge branch 'master' of ssh://cmph.git.sourceforge.net/gitroot/cmph/cmph
This commit is contained in:
commit
2bfe38d2da
80
src/bdz.c
80
src/bdz.c
@ -35,9 +35,9 @@ const cmph_uint8 bdz_lookup_table[] =
|
||||
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
||||
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
||||
2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1, 0
|
||||
};
|
||||
};
|
||||
|
||||
typedef struct
|
||||
typedef struct
|
||||
{
|
||||
cmph_uint32 vertices[3];
|
||||
cmph_uint32 next_edges[3];
|
||||
@ -54,12 +54,12 @@ static void bdz_free_queue(bdz_queue_t * queue)
|
||||
free(*queue);
|
||||
};
|
||||
|
||||
typedef struct
|
||||
typedef struct
|
||||
{
|
||||
cmph_uint32 nedges;
|
||||
bdz_edge_t * edges;
|
||||
cmph_uint32 * first_edge;
|
||||
cmph_uint8 * vert_degree;
|
||||
cmph_uint8 * vert_degree;
|
||||
}bdz_graph3_t;
|
||||
|
||||
|
||||
@ -67,7 +67,7 @@ static void bdz_alloc_graph3(bdz_graph3_t * graph3, cmph_uint32 nedges, cmph_uin
|
||||
{
|
||||
graph3->edges=malloc(nedges*sizeof(bdz_edge_t));
|
||||
graph3->first_edge=malloc(nvertices*sizeof(cmph_uint32));
|
||||
graph3->vert_degree=malloc((size_t)nvertices);
|
||||
graph3->vert_degree=malloc((size_t)nvertices);
|
||||
};
|
||||
static void bdz_init_graph3(bdz_graph3_t * graph3, cmph_uint32 nedges, cmph_uint32 nvertices)
|
||||
{
|
||||
@ -136,7 +136,7 @@ static void bdz_remove_edge(bdz_graph3_t * graph3, cmph_uint32 curr_edge)
|
||||
j=0;
|
||||
} else if(graph3->edges[edge1].vertices[1]==vert){
|
||||
j=1;
|
||||
} else
|
||||
} else
|
||||
j=2;
|
||||
edge1=graph3->edges[edge1].next_edges[j];
|
||||
};
|
||||
@ -145,16 +145,16 @@ static void bdz_remove_edge(bdz_graph3_t * graph3, cmph_uint32 curr_edge)
|
||||
bdz_dump_graph(graph3,graph3->nedges,graph3->nedges+graph3->nedges/4);
|
||||
exit(-1);
|
||||
};
|
||||
|
||||
|
||||
if(edge2!=NULL_EDGE){
|
||||
graph3->edges[edge2].next_edges[j] =
|
||||
graph3->edges[edge2].next_edges[j] =
|
||||
graph3->edges[edge1].next_edges[i];
|
||||
} else
|
||||
} else
|
||||
graph3->first_edge[vert]=
|
||||
graph3->edges[edge1].next_edges[i];
|
||||
graph3->vert_degree[vert]--;
|
||||
};
|
||||
|
||||
|
||||
};
|
||||
|
||||
static int bdz_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_queue_t queue, bdz_graph3_t* graph3)
|
||||
@ -170,7 +170,7 @@ static int bdz_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_que
|
||||
v0=graph3->edges[i].vertices[0];
|
||||
v1=graph3->edges[i].vertices[1];
|
||||
v2=graph3->edges[i].vertices[2];
|
||||
if(graph3->vert_degree[v0]==1 ||
|
||||
if(graph3->vert_degree[v0]==1 ||
|
||||
graph3->vert_degree[v1]==1 ||
|
||||
graph3->vert_degree[v2]==1){
|
||||
if(!GETBIT(marked_edge,i)) {
|
||||
@ -196,7 +196,7 @@ static int bdz_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_que
|
||||
queue[queue_head++]=tmp_edge;
|
||||
SETBIT(marked_edge,tmp_edge);
|
||||
};
|
||||
|
||||
|
||||
};
|
||||
if(graph3->vert_degree[v1]==1) {
|
||||
tmp_edge=graph3->first_edge[v1];
|
||||
@ -204,7 +204,7 @@ static int bdz_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_que
|
||||
queue[queue_head++]=tmp_edge;
|
||||
SETBIT(marked_edge,tmp_edge);
|
||||
};
|
||||
|
||||
|
||||
};
|
||||
if(graph3->vert_degree[v2]==1){
|
||||
tmp_edge=graph3->first_edge[v2];
|
||||
@ -227,7 +227,7 @@ bdz_config_data_t *bdz_config_new(void)
|
||||
{
|
||||
bdz_config_data_t *bdz;
|
||||
bdz = (bdz_config_data_t *)malloc(sizeof(bdz_config_data_t));
|
||||
assert(bdz);
|
||||
if (!bdz) return NULL;
|
||||
memset(bdz, 0, sizeof(bdz_config_data_t));
|
||||
bdz->hashfunc = CMPH_HASH_JENKINS;
|
||||
bdz->g = NULL;
|
||||
@ -328,10 +328,10 @@ cmph_t *bdz_new(cmph_config_t *mph, double c)
|
||||
fprintf(stderr, "acyclic graph creation failure - %u iterations remaining\n", iterations);
|
||||
}
|
||||
if (iterations == 0) break;
|
||||
}
|
||||
}
|
||||
else break;
|
||||
}
|
||||
|
||||
|
||||
if (iterations == 0)
|
||||
{
|
||||
bdz_free_queue(&edges);
|
||||
@ -353,7 +353,7 @@ cmph_t *bdz_new(cmph_config_t *mph, double c)
|
||||
fprintf(stderr, "Entering ranking step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n);
|
||||
}
|
||||
ranking(bdz);
|
||||
#ifdef CMPH_TIMING
|
||||
#ifdef CMPH_TIMING
|
||||
ELAPSED_TIME_IN_SECONDS(&construction_time);
|
||||
#endif
|
||||
mphf = (cmph_t *)malloc(sizeof(cmph_t));
|
||||
@ -381,17 +381,17 @@ cmph_t *bdz_new(cmph_config_t *mph, double c)
|
||||
}
|
||||
|
||||
|
||||
#ifdef CMPH_TIMING
|
||||
#ifdef CMPH_TIMING
|
||||
register cmph_uint32 space_usage = bdz_packed_size(mphf)*8;
|
||||
register cmph_uint32 keys_per_bucket = 1;
|
||||
construction_time = construction_time - construction_time_begin;
|
||||
fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", bdz->m, bdz->m/(double)bdz->n, keys_per_bucket, construction_time, space_usage/(double)bdz->m);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return mphf;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int bdz_mapping(cmph_config_t *mph, bdz_graph3_t* graph3, bdz_queue_t queue)
|
||||
{
|
||||
cmph_uint32 e;
|
||||
@ -405,7 +405,7 @@ static int bdz_mapping(cmph_config_t *mph, bdz_graph3_t* graph3, bdz_queue_t que
|
||||
cmph_uint32 h0, h1, h2;
|
||||
cmph_uint32 keylen;
|
||||
char *key = NULL;
|
||||
mph->key_source->read(mph->key_source->data, &key, &keylen);
|
||||
mph->key_source->read(mph->key_source->data, &key, &keylen);
|
||||
hash_vector(bdz->hl, key, keylen,hl);
|
||||
h0 = hl[0] % bdz->r;
|
||||
h1 = hl[1] % bdz->r + bdz->r;
|
||||
@ -414,7 +414,7 @@ static int bdz_mapping(cmph_config_t *mph, bdz_graph3_t* graph3, bdz_queue_t que
|
||||
mph->key_source->dispose(mph->key_source->data, key, keylen);
|
||||
bdz_add_edge(graph3,h0,h1,h2);
|
||||
}
|
||||
cycles = bdz_generate_queue(bdz->m, bdz->n, queue, graph3);
|
||||
cycles = bdz_generate_queue(bdz->m, bdz->n, queue, graph3);
|
||||
return (cycles == 0);
|
||||
}
|
||||
|
||||
@ -426,7 +426,7 @@ static void assigning(bdz_config_data_t *bdz, bdz_graph3_t* graph3, bdz_queue_t
|
||||
cmph_uint32 v0,v1,v2;
|
||||
cmph_uint8 * marked_vertices =malloc((size_t)(bdz->n >> 3) + 1);
|
||||
cmph_uint32 sizeg = (cmph_uint32)ceil(bdz->n/4.0);
|
||||
bdz->g = (cmph_uint8 *)calloc((size_t)(sizeg), sizeof(cmph_uint8));
|
||||
bdz->g = (cmph_uint8 *)calloc((size_t)(sizeg), sizeof(cmph_uint8));
|
||||
memset(marked_vertices, 0, (size_t)(bdz->n >> 3) + 1);
|
||||
memset(bdz->g, 0xff, (size_t)(sizeg));
|
||||
|
||||
@ -439,12 +439,12 @@ static void assigning(bdz_config_data_t *bdz, bdz_graph3_t* graph3, bdz_queue_t
|
||||
if(!GETBIT(marked_vertices, v0)){
|
||||
if(!GETBIT(marked_vertices,v1))
|
||||
{
|
||||
SETVALUE1(bdz->g, v1, UNASSIGNED);
|
||||
SETVALUE1(bdz->g, v1, UNASSIGNED);
|
||||
SETBIT(marked_vertices, v1);
|
||||
}
|
||||
if(!GETBIT(marked_vertices,v2))
|
||||
{
|
||||
SETVALUE1(bdz->g, v2, UNASSIGNED);
|
||||
SETVALUE1(bdz->g, v2, UNASSIGNED);
|
||||
SETBIT(marked_vertices, v2);
|
||||
}
|
||||
SETVALUE1(bdz->g, v0, (6-(GETVALUE(bdz->g, v1) + GETVALUE(bdz->g,v2)))%3);
|
||||
@ -507,7 +507,7 @@ int bdz_dump(cmph_t *mphf, FILE *fd)
|
||||
nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd);
|
||||
nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
|
||||
nbytes = fwrite(&(data->r), sizeof(cmph_uint32), (size_t)1, fd);
|
||||
|
||||
|
||||
cmph_uint32 sizeg = (cmph_uint32)ceil(data->n/4.0);
|
||||
nbytes = fwrite(data->g, sizeof(cmph_uint8)*sizeg, (size_t)1, fd);
|
||||
|
||||
@ -541,12 +541,12 @@ void bdz_load(FILE *f, cmph_t *mphf)
|
||||
nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
|
||||
bdz->hl = hash_state_load(buf, buflen);
|
||||
free(buf);
|
||||
|
||||
|
||||
|
||||
DEBUGP("Reading m and n\n");
|
||||
nbytes = fread(&(bdz->n), sizeof(cmph_uint32), (size_t)1, f);
|
||||
nbytes = fread(&(bdz->m), sizeof(cmph_uint32), (size_t)1, f);
|
||||
nbytes = fread(&(bdz->r), sizeof(cmph_uint32), (size_t)1, f);
|
||||
nbytes = fread(&(bdz->n), sizeof(cmph_uint32), (size_t)1, f);
|
||||
nbytes = fread(&(bdz->m), sizeof(cmph_uint32), (size_t)1, f);
|
||||
nbytes = fread(&(bdz->r), sizeof(cmph_uint32), (size_t)1, f);
|
||||
sizeg = (cmph_uint32)ceil(bdz->n/4.0);
|
||||
bdz->g = (cmph_uint8 *)calloc((size_t)(sizeg), sizeof(cmph_uint8));
|
||||
nbytes = fread(bdz->g, sizeg*sizeof(cmph_uint8), (size_t)1, f);
|
||||
@ -566,7 +566,7 @@ void bdz_load(FILE *f, cmph_t *mphf)
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static inline cmph_uint32 rank(cmph_uint32 b, cmph_uint32 * ranktable, cmph_uint8 * g, cmph_uint32 vertex)
|
||||
{
|
||||
@ -578,17 +578,17 @@ static inline cmph_uint32 rank(cmph_uint32 b, cmph_uint32 * ranktable, cmph_uint
|
||||
while(beg_idx_b < end_idx_b)
|
||||
{
|
||||
base_rank += bdz_lookup_table[*(g + beg_idx_b++)];
|
||||
|
||||
|
||||
}
|
||||
DEBUGP("base rank %u\n", base_rank);
|
||||
beg_idx_v = beg_idx_b << 2;
|
||||
DEBUGP("beg_idx_v %u\n", beg_idx_v);
|
||||
while(beg_idx_v < vertex)
|
||||
while(beg_idx_v < vertex)
|
||||
{
|
||||
if(GETVALUE(g, beg_idx_v) != UNASSIGNED) base_rank++;
|
||||
beg_idx_v++;
|
||||
}
|
||||
|
||||
|
||||
return base_rank;
|
||||
}
|
||||
|
||||
@ -610,7 +610,7 @@ cmph_uint32 bdz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||
void bdz_destroy(cmph_t *mphf)
|
||||
{
|
||||
bdz_data_t *data = (bdz_data_t *)mphf->data;
|
||||
free(data->g);
|
||||
free(data->g);
|
||||
hash_state_destroy(data->hl);
|
||||
free(data->ranktable);
|
||||
free(data);
|
||||
@ -660,18 +660,18 @@ void bdz_pack(cmph_t *mphf, void *packed_mphf)
|
||||
* \brief Return the amount of space needed to pack mphf.
|
||||
* \param mphf pointer to a mphf
|
||||
* \return the size of the packed function or zero for failures
|
||||
*/
|
||||
*/
|
||||
cmph_uint32 bdz_packed_size(cmph_t *mphf)
|
||||
{
|
||||
bdz_data_t *data = (bdz_data_t *)mphf->data;
|
||||
|
||||
CMPH_HASH hl_type = hash_get_type(data->hl);
|
||||
CMPH_HASH hl_type = hash_get_type(data->hl);
|
||||
|
||||
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(hl_type) + 3*sizeof(cmph_uint32) + sizeof(cmph_uint32)*(data->ranktablesize) + sizeof(cmph_uint8) + sizeof(cmph_uint8)* (cmph_uint32)(ceil(data->n/4.0)));
|
||||
}
|
||||
|
||||
/** cmph_uint32 bdz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||
* \brief Use the packed mphf to do a search.
|
||||
* \brief Use the packed mphf to do a search.
|
||||
* \param packed_mphf pointer to the packed mphf
|
||||
* \param key key to be hashed
|
||||
* \param keylen key legth in bytes
|
||||
@ -679,13 +679,13 @@ cmph_uint32 bdz_packed_size(cmph_t *mphf)
|
||||
*/
|
||||
cmph_uint32 bdz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
|
||||
{
|
||||
|
||||
|
||||
register cmph_uint32 vertex;
|
||||
register CMPH_HASH hl_type = *(cmph_uint32 *)packed_mphf;
|
||||
register cmph_uint8 *hl_ptr = (cmph_uint8 *)(packed_mphf) + 4;
|
||||
|
||||
register cmph_uint32 *ranktable = (cmph_uint32*)(hl_ptr + hash_state_packed_size(hl_type));
|
||||
|
||||
|
||||
register cmph_uint32 r = *ranktable++;
|
||||
register cmph_uint32 ranktablesize = *ranktable++;
|
||||
register cmph_uint8 * g = (cmph_uint8 *)(ranktable + ranktablesize);
|
||||
|
106
src/bdz_ph.c
106
src/bdz_ph.c
@ -24,7 +24,7 @@ static cmph_uint8 lookup_table[5][256] = {
|
||||
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
};
|
||||
|
||||
typedef struct
|
||||
typedef struct
|
||||
{
|
||||
cmph_uint32 vertices[3];
|
||||
cmph_uint32 next_edges[3];
|
||||
@ -41,12 +41,12 @@ static void bdz_ph_free_queue(bdz_ph_queue_t * queue)
|
||||
free(*queue);
|
||||
};
|
||||
|
||||
typedef struct
|
||||
typedef struct
|
||||
{
|
||||
cmph_uint32 nedges;
|
||||
bdz_ph_edge_t * edges;
|
||||
cmph_uint32 * first_edge;
|
||||
cmph_uint8 * vert_degree;
|
||||
cmph_uint8 * vert_degree;
|
||||
}bdz_ph_graph3_t;
|
||||
|
||||
|
||||
@ -54,7 +54,7 @@ static void bdz_ph_alloc_graph3(bdz_ph_graph3_t * graph3, cmph_uint32 nedges, cm
|
||||
{
|
||||
graph3->edges=malloc(nedges*sizeof(bdz_ph_edge_t));
|
||||
graph3->first_edge=malloc(nvertices*sizeof(cmph_uint32));
|
||||
graph3->vert_degree=malloc((size_t)nvertices);
|
||||
graph3->vert_degree=malloc((size_t)nvertices);
|
||||
};
|
||||
static void bdz_ph_init_graph3(bdz_ph_graph3_t * graph3, cmph_uint32 nedges, cmph_uint32 nvertices)
|
||||
{
|
||||
@ -101,10 +101,10 @@ static void bdz_ph_dump_graph(bdz_ph_graph3_t* graph3, cmph_uint32 nedges, cmph_
|
||||
printf(" nexts %d %d %d",graph3->edges[i].next_edges[0],
|
||||
graph3->edges[i].next_edges[1],graph3->edges[i].next_edges[2]);
|
||||
};
|
||||
|
||||
|
||||
for(i=0;i<nvertices;i++){
|
||||
printf("\nfirst for vertice %d %d ",i,graph3->first_edge[i]);
|
||||
|
||||
|
||||
};
|
||||
};
|
||||
|
||||
@ -121,7 +121,7 @@ static void bdz_ph_remove_edge(bdz_ph_graph3_t * graph3, cmph_uint32 curr_edge)
|
||||
j=0;
|
||||
} else if(graph3->edges[edge1].vertices[1]==vert){
|
||||
j=1;
|
||||
} else
|
||||
} else
|
||||
j=2;
|
||||
edge1=graph3->edges[edge1].next_edges[j];
|
||||
};
|
||||
@ -130,16 +130,16 @@ static void bdz_ph_remove_edge(bdz_ph_graph3_t * graph3, cmph_uint32 curr_edge)
|
||||
bdz_ph_dump_graph(graph3,graph3->nedges,graph3->nedges+graph3->nedges/4);
|
||||
exit(-1);
|
||||
};
|
||||
|
||||
|
||||
if(edge2!=NULL_EDGE){
|
||||
graph3->edges[edge2].next_edges[j] =
|
||||
graph3->edges[edge2].next_edges[j] =
|
||||
graph3->edges[edge1].next_edges[i];
|
||||
} else
|
||||
} else
|
||||
graph3->first_edge[vert]=
|
||||
graph3->edges[edge1].next_edges[i];
|
||||
graph3->vert_degree[vert]--;
|
||||
};
|
||||
|
||||
|
||||
};
|
||||
|
||||
static int bdz_ph_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_ph_queue_t queue, bdz_ph_graph3_t* graph3)
|
||||
@ -176,7 +176,7 @@ static int bdz_ph_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_
|
||||
queue[queue_head++]=tmp_edge;
|
||||
SETBIT(marked_edge,tmp_edge);
|
||||
};
|
||||
|
||||
|
||||
};
|
||||
if(graph3->vert_degree[v1]==1) {
|
||||
tmp_edge=graph3->first_edge[v1];
|
||||
@ -184,7 +184,7 @@ static int bdz_ph_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_
|
||||
queue[queue_head++]=tmp_edge;
|
||||
SETBIT(marked_edge,tmp_edge);
|
||||
};
|
||||
|
||||
|
||||
};
|
||||
if(graph3->vert_degree[v2]==1){
|
||||
tmp_edge=graph3->first_edge[v2];
|
||||
@ -229,7 +229,7 @@ void bdz_ph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
|
||||
while(*hashptr != CMPH_HASH_COUNT)
|
||||
{
|
||||
if (i >= 1) break; //bdz_ph only uses one linear hash function
|
||||
bdz_ph->hashfunc = *hashptr;
|
||||
bdz_ph->hashfunc = *hashptr;
|
||||
++i, ++hashptr;
|
||||
}
|
||||
}
|
||||
@ -251,16 +251,16 @@ cmph_t *bdz_ph_new(cmph_config_t *mph, double c)
|
||||
|
||||
if (c == 0) c = 1.23; // validating restrictions over parameter c.
|
||||
DEBUGP("c: %f\n", c);
|
||||
bdz_ph->m = mph->key_source->nkeys;
|
||||
bdz_ph->r = (cmph_uint32)ceil((c * mph->key_source->nkeys)/3);
|
||||
bdz_ph->m = mph->key_source->nkeys;
|
||||
bdz_ph->r = (cmph_uint32)ceil((c * mph->key_source->nkeys)/3);
|
||||
if ((bdz_ph->r % 2) == 0) bdz_ph->r += 1;
|
||||
bdz_ph->n = 3*bdz_ph->r;
|
||||
|
||||
|
||||
|
||||
bdz_ph_alloc_graph3(&graph3, bdz_ph->m, bdz_ph->n);
|
||||
bdz_ph_alloc_queue(&edges,bdz_ph->m);
|
||||
DEBUGP("Created hypergraph\n");
|
||||
|
||||
|
||||
DEBUGP("m (edges): %u n (vertices): %u r: %u c: %f \n", bdz_ph->m, bdz_ph->n, bdz_ph->r, c);
|
||||
|
||||
// Mapping step
|
||||
@ -287,10 +287,10 @@ cmph_t *bdz_ph_new(cmph_config_t *mph, double c)
|
||||
fprintf(stderr, "acyclic graph creation failure - %u iterations remaining\n", iterations);
|
||||
}
|
||||
if (iterations == 0) break;
|
||||
}
|
||||
}
|
||||
else break;
|
||||
}
|
||||
|
||||
|
||||
if (iterations == 0)
|
||||
{
|
||||
// free(bdz_ph->g);
|
||||
@ -308,7 +308,7 @@ cmph_t *bdz_ph_new(cmph_config_t *mph, double c)
|
||||
|
||||
bdz_ph_free_queue(&edges);
|
||||
bdz_ph_free_graph3(&graph3);
|
||||
|
||||
|
||||
if (mph->verbosity)
|
||||
{
|
||||
fprintf(stderr, "Starting optimization step\n");
|
||||
@ -338,23 +338,23 @@ cmph_t *bdz_ph_new(cmph_config_t *mph, double c)
|
||||
fprintf(stderr, "Successfully generated minimal perfect hash function\n");
|
||||
}
|
||||
|
||||
#ifdef CMPH_TIMING
|
||||
#ifdef CMPH_TIMING
|
||||
register cmph_uint32 space_usage = bdz_ph_packed_size(mphf)*8;
|
||||
register cmph_uint32 keys_per_bucket = 1;
|
||||
construction_time = construction_time - construction_time_begin;
|
||||
fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", bdz_ph->m, bdz_ph->m/(double)bdz_ph->n, keys_per_bucket, construction_time, space_usage/(double)bdz_ph->m);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return mphf;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int bdz_ph_mapping(cmph_config_t *mph, bdz_ph_graph3_t* graph3, bdz_ph_queue_t queue)
|
||||
{
|
||||
cmph_uint32 e;
|
||||
int cycles = 0;
|
||||
cmph_uint32 hl[3];
|
||||
|
||||
|
||||
bdz_ph_config_data_t *bdz_ph = (bdz_ph_config_data_t *)mph->data;
|
||||
bdz_ph_init_graph3(graph3, bdz_ph->m, bdz_ph->n);
|
||||
mph->key_source->rewind(mph->key_source->data);
|
||||
@ -363,7 +363,7 @@ static int bdz_ph_mapping(cmph_config_t *mph, bdz_ph_graph3_t* graph3, bdz_ph_qu
|
||||
cmph_uint32 h0, h1, h2;
|
||||
cmph_uint32 keylen;
|
||||
char *key = NULL;
|
||||
mph->key_source->read(mph->key_source->data, &key, &keylen);
|
||||
mph->key_source->read(mph->key_source->data, &key, &keylen);
|
||||
hash_vector(bdz_ph->hl, key, keylen, hl);
|
||||
h0 = hl[0] % bdz_ph->r;
|
||||
h1 = hl[1] % bdz_ph->r + bdz_ph->r;
|
||||
@ -371,7 +371,7 @@ static int bdz_ph_mapping(cmph_config_t *mph, bdz_ph_graph3_t* graph3, bdz_ph_qu
|
||||
mph->key_source->dispose(mph->key_source->data, key, keylen);
|
||||
bdz_ph_add_edge(graph3,h0,h1,h2);
|
||||
}
|
||||
cycles = bdz_ph_generate_queue(bdz_ph->m, bdz_ph->n, queue, graph3);
|
||||
cycles = bdz_ph_generate_queue(bdz_ph->m, bdz_ph->n, queue, graph3);
|
||||
return (cycles == 0);
|
||||
}
|
||||
|
||||
@ -383,7 +383,7 @@ static void assigning(bdz_ph_config_data_t *bdz_ph, bdz_ph_graph3_t* graph3, bdz
|
||||
cmph_uint32 v0,v1,v2;
|
||||
cmph_uint8 * marked_vertices =malloc((size_t)(bdz_ph->n >> 3) + 1);
|
||||
cmph_uint32 sizeg = (cmph_uint32)ceil(bdz_ph->n/4.0);
|
||||
bdz_ph->g = (cmph_uint8 *)calloc((size_t)sizeg, sizeof(cmph_uint8));
|
||||
bdz_ph->g = (cmph_uint8 *)calloc((size_t)sizeg, sizeof(cmph_uint8));
|
||||
memset(marked_vertices, 0, (size_t)(bdz_ph->n >> 3) + 1);
|
||||
//memset(bdz_ph->g, 0xff, sizeg);
|
||||
|
||||
@ -396,14 +396,14 @@ static void assigning(bdz_ph_config_data_t *bdz_ph, bdz_ph_graph3_t* graph3, bdz
|
||||
if(!GETBIT(marked_vertices, v0)){
|
||||
if(!GETBIT(marked_vertices,v1))
|
||||
{
|
||||
//SETVALUE(bdz_ph->g, v1, UNASSIGNED);
|
||||
//SETVALUE(bdz_ph->g, v1, UNASSIGNED);
|
||||
SETBIT(marked_vertices, v1);
|
||||
}
|
||||
if(!GETBIT(marked_vertices,v2))
|
||||
{
|
||||
//SETVALUE(bdz_ph->g, v2, UNASSIGNED);
|
||||
//SETVALUE(bdz_ph->g, v2, UNASSIGNED);
|
||||
SETBIT(marked_vertices, v2);
|
||||
}
|
||||
}
|
||||
SETVALUE0(bdz_ph->g, v0, (6-(GETVALUE(bdz_ph->g, v1) + GETVALUE(bdz_ph->g,v2)))%3);
|
||||
SETBIT(marked_vertices, v0);
|
||||
} else if(!GETBIT(marked_vertices, v1)) {
|
||||
@ -417,7 +417,7 @@ static void assigning(bdz_ph_config_data_t *bdz_ph, bdz_ph_graph3_t* graph3, bdz
|
||||
}else {
|
||||
SETVALUE0(bdz_ph->g, v2, (8-(GETVALUE(bdz_ph->g,v0)+GETVALUE(bdz_ph->g, v1)))%3);
|
||||
SETBIT(marked_vertices, v2);
|
||||
}
|
||||
}
|
||||
DEBUGP("A:%u %u %u -- %u %u %u\n", v0, v1, v2, GETVALUE(bdz_ph->g, v0), GETVALUE(bdz_ph->g, v1), GETVALUE(bdz_ph->g, v2));
|
||||
};
|
||||
free(marked_vertices);
|
||||
@ -428,11 +428,11 @@ static void bdz_ph_optimization(bdz_ph_config_data_t *bdz_ph)
|
||||
cmph_uint32 i;
|
||||
cmph_uint8 byte = 0;
|
||||
cmph_uint32 sizeg = (cmph_uint32)ceil(bdz_ph->n/5.0);
|
||||
cmph_uint8 * new_g = (cmph_uint8 *)calloc((size_t)sizeg, sizeof(cmph_uint8));
|
||||
cmph_uint8 * new_g = (cmph_uint8 *)calloc((size_t)sizeg, sizeof(cmph_uint8));
|
||||
cmph_uint8 value;
|
||||
cmph_uint32 idx;
|
||||
for(i = 0; i < bdz_ph->n; i++)
|
||||
{
|
||||
for(i = 0; i < bdz_ph->n; i++)
|
||||
{
|
||||
idx = i/5;
|
||||
byte = new_g[idx];
|
||||
value = GETVALUE(bdz_ph->g, i);
|
||||
@ -462,7 +462,7 @@ int bdz_ph_dump(cmph_t *mphf, FILE *fd)
|
||||
nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd);
|
||||
nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
|
||||
nbytes = fwrite(&(data->r), sizeof(cmph_uint32), (size_t)1, fd);
|
||||
sizeg = (cmph_uint32)ceil(data->n/5.0);
|
||||
sizeg = (cmph_uint32)ceil(data->n/5.0);
|
||||
nbytes = fwrite(data->g, sizeof(cmph_uint8)*sizeg, (size_t)1, fd);
|
||||
|
||||
#ifdef DEBUG
|
||||
@ -491,19 +491,19 @@ void bdz_ph_load(FILE *f, cmph_t *mphf)
|
||||
nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
|
||||
bdz_ph->hl = hash_state_load(buf, buflen);
|
||||
free(buf);
|
||||
|
||||
|
||||
|
||||
DEBUGP("Reading m and n\n");
|
||||
nbytes = fread(&(bdz_ph->n), sizeof(cmph_uint32), (size_t)1, f);
|
||||
nbytes = fread(&(bdz_ph->m), sizeof(cmph_uint32), (size_t)1, f);
|
||||
nbytes = fread(&(bdz_ph->r), sizeof(cmph_uint32), (size_t)1, f);
|
||||
nbytes = fread(&(bdz_ph->n), sizeof(cmph_uint32), (size_t)1, f);
|
||||
nbytes = fread(&(bdz_ph->m), sizeof(cmph_uint32), (size_t)1, f);
|
||||
nbytes = fread(&(bdz_ph->r), sizeof(cmph_uint32), (size_t)1, f);
|
||||
sizeg = (cmph_uint32)ceil(bdz_ph->n/5.0);
|
||||
bdz_ph->g = (cmph_uint8 *)calloc((size_t)sizeg, sizeof(cmph_uint8));
|
||||
nbytes = fread(bdz_ph->g, sizeg*sizeof(cmph_uint8), (size_t)1, f);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
cmph_uint32 bdz_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||
{
|
||||
@ -520,12 +520,12 @@ cmph_uint32 bdz_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||
byte0 = bdz_ph->g[hl[0]/5];
|
||||
byte1 = bdz_ph->g[hl[1]/5];
|
||||
byte2 = bdz_ph->g[hl[2]/5];
|
||||
|
||||
|
||||
byte0 = lookup_table[hl[0]%5U][byte0];
|
||||
byte1 = lookup_table[hl[1]%5U][byte1];
|
||||
byte2 = lookup_table[hl[2]%5U][byte2];
|
||||
vertex = hl[(byte0 + byte1 + byte2)%3];
|
||||
|
||||
|
||||
return vertex;
|
||||
}
|
||||
|
||||
@ -533,7 +533,7 @@ cmph_uint32 bdz_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||
void bdz_ph_destroy(cmph_t *mphf)
|
||||
{
|
||||
bdz_ph_data_t *data = (bdz_ph_data_t *)mphf->data;
|
||||
free(data->g);
|
||||
free(data->g);
|
||||
hash_state_destroy(data->hl);
|
||||
free(data);
|
||||
free(mphf);
|
||||
@ -571,17 +571,17 @@ void bdz_ph_pack(cmph_t *mphf, void *packed_mphf)
|
||||
* \brief Return the amount of space needed to pack mphf.
|
||||
* \param mphf pointer to a mphf
|
||||
* \return the size of the packed function or zero for failures
|
||||
*/
|
||||
*/
|
||||
cmph_uint32 bdz_ph_packed_size(cmph_t *mphf)
|
||||
{
|
||||
bdz_ph_data_t *data = (bdz_ph_data_t *)mphf->data;
|
||||
CMPH_HASH hl_type = hash_get_type(data->hl);
|
||||
CMPH_HASH hl_type = hash_get_type(data->hl);
|
||||
cmph_uint32 sizeg = (cmph_uint32)ceil(data->n/5.0);
|
||||
return (cmph_uint32) (sizeof(CMPH_ALGO) + hash_state_packed_size(hl_type) + 2*sizeof(cmph_uint32) + sizeof(cmph_uint8)*sizeg);
|
||||
}
|
||||
|
||||
/** cmph_uint32 bdz_ph_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||
* \brief Use the packed mphf to do a search.
|
||||
* \brief Use the packed mphf to do a search.
|
||||
* \param packed_mphf pointer to the packed mphf
|
||||
* \param key key to be hashed
|
||||
* \param keylen key legth in bytes
|
||||
@ -589,21 +589,21 @@ cmph_uint32 bdz_ph_packed_size(cmph_t *mphf)
|
||||
*/
|
||||
cmph_uint32 bdz_ph_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
|
||||
{
|
||||
|
||||
|
||||
register CMPH_HASH hl_type = *(cmph_uint32 *)packed_mphf;
|
||||
register cmph_uint8 *hl_ptr = (cmph_uint8 *)(packed_mphf) + 4;
|
||||
|
||||
|
||||
register cmph_uint8 * ptr = hl_ptr + hash_state_packed_size(hl_type);
|
||||
|
||||
register cmph_uint32 r = *((cmph_uint32*) ptr);
|
||||
register cmph_uint8 * g = ptr + 4;
|
||||
|
||||
|
||||
cmph_uint32 hl[3];
|
||||
register cmph_uint8 byte0, byte1, byte2;
|
||||
register cmph_uint32 vertex;
|
||||
|
||||
hash_vector_packed(hl_ptr, hl_type, key, keylen, hl);
|
||||
|
||||
|
||||
hl[0] = hl[0] % r;
|
||||
hl[1] = hl[1] % r + r;
|
||||
hl[2] = hl[2] % r + (r << 1);
|
||||
@ -611,11 +611,11 @@ cmph_uint32 bdz_ph_search_packed(void *packed_mphf, const char *key, cmph_uint32
|
||||
byte0 = g[hl[0]/5];
|
||||
byte1 = g[hl[1]/5];
|
||||
byte2 = g[hl[2]/5];
|
||||
|
||||
|
||||
byte0 = lookup_table[hl[0]%5][byte0];
|
||||
byte1 = lookup_table[hl[1]%5][byte1];
|
||||
byte2 = lookup_table[hl[2]%5][byte2];
|
||||
vertex = hl[(byte0 + byte1 + byte2)%3];
|
||||
|
||||
|
||||
return vertex;
|
||||
}
|
||||
|
@ -128,4 +128,3 @@ int main(int argc, char** argv) {
|
||||
lsmap_destroy(g_created_mphf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
116
src/bmz.c
116
src/bmz.c
@ -24,7 +24,7 @@ bmz_config_data_t *bmz_config_new(void)
|
||||
{
|
||||
bmz_config_data_t *bmz = NULL;
|
||||
bmz = (bmz_config_data_t *)malloc(sizeof(bmz_config_data_t));
|
||||
assert(bmz);
|
||||
if (!bmz) return NULL;
|
||||
memset(bmz, 0, sizeof(bmz_config_data_t));
|
||||
bmz->hashfuncs[0] = CMPH_HASH_JENKINS;
|
||||
bmz->hashfuncs[1] = CMPH_HASH_JENKINS;
|
||||
@ -49,7 +49,7 @@ void bmz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
|
||||
while(*hashptr != CMPH_HASH_COUNT)
|
||||
{
|
||||
if (i >= 2) break; //bmz only uses two hash functions
|
||||
bmz->hashfuncs[i] = *hashptr;
|
||||
bmz->hashfuncs[i] = *hashptr;
|
||||
++i, ++hashptr;
|
||||
}
|
||||
}
|
||||
@ -68,8 +68,8 @@ cmph_t *bmz_new(cmph_config_t *mph, double c)
|
||||
bmz_config_data_t *bmz = (bmz_config_data_t *)mph->data;
|
||||
if (c == 0) c = 1.15; // validating restrictions over parameter c.
|
||||
DEBUGP("c: %f\n", c);
|
||||
bmz->m = mph->key_source->nkeys;
|
||||
bmz->n = (cmph_uint32)ceil(c * mph->key_source->nkeys);
|
||||
bmz->m = mph->key_source->nkeys;
|
||||
bmz->n = (cmph_uint32)ceil(c * mph->key_source->nkeys);
|
||||
DEBUGP("m (edges): %u n (vertices): %u c: %f\n", bmz->m, bmz->n, c);
|
||||
bmz->graph = graph_new(bmz->n, bmz->m);
|
||||
DEBUGP("Created graph\n");
|
||||
@ -81,7 +81,7 @@ cmph_t *bmz_new(cmph_config_t *mph, double c)
|
||||
{
|
||||
// Mapping step
|
||||
cmph_uint32 biggest_g_value = 0;
|
||||
cmph_uint32 biggest_edge_value = 1;
|
||||
cmph_uint32 biggest_edge_value = 1;
|
||||
iterations = 100;
|
||||
if (mph->verbosity)
|
||||
{
|
||||
@ -109,12 +109,12 @@ cmph_t *bmz_new(cmph_config_t *mph, double c)
|
||||
fprintf(stderr, "simple graph creation failure - %u iterations remaining\n", iterations);
|
||||
}
|
||||
if (iterations == 0) break;
|
||||
}
|
||||
}
|
||||
else break;
|
||||
}
|
||||
if (iterations == 0)
|
||||
{
|
||||
graph_destroy(bmz->graph);
|
||||
graph_destroy(bmz->graph);
|
||||
return NULL;
|
||||
}
|
||||
// Ordering step
|
||||
@ -155,17 +155,17 @@ cmph_t *bmz_new(cmph_config_t *mph, double c)
|
||||
}
|
||||
bmz_traverse_non_critical_nodes(bmz, used_edges, visited); // non_critical_nodes
|
||||
}
|
||||
else
|
||||
else
|
||||
{
|
||||
iterations_map--;
|
||||
if (mph->verbosity) fprintf(stderr, "Restarting mapping step. %u iterations remaining.\n", iterations_map);
|
||||
}
|
||||
}
|
||||
free(used_edges);
|
||||
free(visited);
|
||||
} while(restart_mapping && iterations_map > 0);
|
||||
graph_destroy(bmz->graph);
|
||||
bmz->graph = NULL;
|
||||
if (iterations_map == 0)
|
||||
if (iterations_map == 0)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
@ -212,15 +212,15 @@ static cmph_uint8 bmz_traverse_critical_nodes(bmz_config_data_t *bmz, cmph_uint3
|
||||
while(!vqueue_is_empty(q))
|
||||
{
|
||||
v = vqueue_remove(q);
|
||||
it = graph_neighbors_it(bmz->graph, v);
|
||||
it = graph_neighbors_it(bmz->graph, v);
|
||||
while ((u = graph_next_neighbor(bmz->graph, &it)) != GRAPH_NO_NEIGHBOR)
|
||||
{
|
||||
{
|
||||
if (graph_node_is_critical(bmz->graph, u) && (!GETBIT(visited,u)))
|
||||
{
|
||||
collision = 1;
|
||||
while(collision) // lookahead to resolve collisions
|
||||
{
|
||||
next_g = *biggest_g_value + 1;
|
||||
next_g = *biggest_g_value + 1;
|
||||
it1 = graph_neighbors_it(bmz->graph, u);
|
||||
collision = 0;
|
||||
while((lav = graph_next_neighbor(bmz->graph, &it1)) != GRAPH_NO_NEIGHBOR)
|
||||
@ -232,7 +232,7 @@ static cmph_uint8 bmz_traverse_critical_nodes(bmz_config_data_t *bmz, cmph_uint3
|
||||
vqueue_destroy(q);
|
||||
return 1; // restart mapping step.
|
||||
}
|
||||
if (GETBIT(used_edges, (next_g + bmz->g[lav])))
|
||||
if (GETBIT(used_edges, (next_g + bmz->g[lav])))
|
||||
{
|
||||
collision = 1;
|
||||
break;
|
||||
@ -240,7 +240,7 @@ static cmph_uint8 bmz_traverse_critical_nodes(bmz_config_data_t *bmz, cmph_uint3
|
||||
}
|
||||
}
|
||||
if (next_g > *biggest_g_value) *biggest_g_value = next_g;
|
||||
}
|
||||
}
|
||||
// Marking used edges...
|
||||
it1 = graph_neighbors_it(bmz->graph, u);
|
||||
while((lav = graph_next_neighbor(bmz->graph, &it1)) != GRAPH_NO_NEIGHBOR)
|
||||
@ -254,9 +254,9 @@ static cmph_uint8 bmz_traverse_critical_nodes(bmz_config_data_t *bmz, cmph_uint3
|
||||
bmz->g[u] = next_g; // Labelling vertex u.
|
||||
SETBIT(visited,u);
|
||||
vqueue_insert(q, u);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
vqueue_destroy(q);
|
||||
return 0;
|
||||
@ -282,22 +282,22 @@ static cmph_uint8 bmz_traverse_critical_nodes_heuristic(bmz_config_data_t *bmz,
|
||||
while(!vqueue_is_empty(q))
|
||||
{
|
||||
v = vqueue_remove(q);
|
||||
it = graph_neighbors_it(bmz->graph, v);
|
||||
it = graph_neighbors_it(bmz->graph, v);
|
||||
while ((u = graph_next_neighbor(bmz->graph, &it)) != GRAPH_NO_NEIGHBOR)
|
||||
{
|
||||
{
|
||||
if (graph_node_is_critical(bmz->graph, u) && (!GETBIT(visited,u)))
|
||||
{
|
||||
cmph_uint32 next_g_index = 0;
|
||||
collision = 1;
|
||||
while(collision) // lookahead to resolve collisions
|
||||
{
|
||||
if (next_g_index < nunused_g_values)
|
||||
if (next_g_index < nunused_g_values)
|
||||
{
|
||||
next_g = unused_g_values[next_g_index++];
|
||||
next_g = unused_g_values[next_g_index++];
|
||||
}
|
||||
else
|
||||
else
|
||||
{
|
||||
next_g = *biggest_g_value + 1;
|
||||
next_g = *biggest_g_value + 1;
|
||||
next_g_index = UINT_MAX;
|
||||
}
|
||||
it1 = graph_neighbors_it(bmz->graph, u);
|
||||
@ -312,7 +312,7 @@ static cmph_uint8 bmz_traverse_critical_nodes_heuristic(bmz_config_data_t *bmz,
|
||||
free(unused_g_values);
|
||||
return 1; // restart mapping step.
|
||||
}
|
||||
if (GETBIT(used_edges, (next_g + bmz->g[lav])))
|
||||
if (GETBIT(used_edges, (next_g + bmz->g[lav])))
|
||||
{
|
||||
collision = 1;
|
||||
break;
|
||||
@ -324,13 +324,13 @@ static cmph_uint8 bmz_traverse_critical_nodes_heuristic(bmz_config_data_t *bmz,
|
||||
if(nunused_g_values == unused_g_values_capacity)
|
||||
{
|
||||
unused_g_values = (cmph_uint32 *)realloc(unused_g_values, (unused_g_values_capacity + BUFSIZ)*sizeof(cmph_uint32));
|
||||
unused_g_values_capacity += BUFSIZ;
|
||||
}
|
||||
unused_g_values[nunused_g_values++] = next_g;
|
||||
unused_g_values_capacity += BUFSIZ;
|
||||
}
|
||||
unused_g_values[nunused_g_values++] = next_g;
|
||||
|
||||
}
|
||||
if (next_g > *biggest_g_value) *biggest_g_value = next_g;
|
||||
}
|
||||
}
|
||||
next_g_index--;
|
||||
if (next_g_index < nunused_g_values) unused_g_values[next_g_index] = unused_g_values[--nunused_g_values];
|
||||
|
||||
@ -347,13 +347,13 @@ static cmph_uint8 bmz_traverse_critical_nodes_heuristic(bmz_config_data_t *bmz,
|
||||
bmz->g[u] = next_g; // Labelling vertex u.
|
||||
SETBIT(visited, u);
|
||||
vqueue_insert(q, u);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
vqueue_destroy(q);
|
||||
free(unused_g_values);
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static cmph_uint32 next_unused_edge(bmz_config_data_t *bmz, cmph_uint8 * used_edges, cmph_uint32 unused_edge_index)
|
||||
@ -381,8 +381,8 @@ static void bmz_traverse(bmz_config_data_t *bmz, cmph_uint8 * used_edges, cmph_u
|
||||
SETBIT(visited, neighbor);
|
||||
(*unused_edge_index)++;
|
||||
bmz_traverse(bmz, used_edges, neighbor, unused_edge_index, visited);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
static void bmz_traverse_non_critical_nodes(bmz_config_data_t *bmz, cmph_uint8 * used_edges, cmph_uint8 * visited)
|
||||
@ -394,7 +394,7 @@ static void bmz_traverse_non_critical_nodes(bmz_config_data_t *bmz, cmph_uint8 *
|
||||
{
|
||||
v1 = graph_vertex_id(bmz->graph, i, 0);
|
||||
v2 = graph_vertex_id(bmz->graph, i, 1);
|
||||
if((GETBIT(visited,v1) && GETBIT(visited,v2)) || (!GETBIT(visited,v1) && !GETBIT(visited,v2))) continue;
|
||||
if((GETBIT(visited,v1) && GETBIT(visited,v2)) || (!GETBIT(visited,v1) && !GETBIT(visited,v2))) continue;
|
||||
if(GETBIT(visited,v1)) bmz_traverse(bmz, used_edges, v1, &unused_edge_index, visited);
|
||||
else bmz_traverse(bmz, used_edges, v2, &unused_edge_index, visited);
|
||||
|
||||
@ -403,7 +403,7 @@ static void bmz_traverse_non_critical_nodes(bmz_config_data_t *bmz, cmph_uint8 *
|
||||
for(i = 0; i < bmz->n; i++)
|
||||
{
|
||||
if(!GETBIT(visited,i))
|
||||
{
|
||||
{
|
||||
bmz->g[i] = 0;
|
||||
SETBIT(visited, i);
|
||||
bmz_traverse(bmz, used_edges, i, &unused_edge_index, visited);
|
||||
@ -411,14 +411,14 @@ static void bmz_traverse_non_critical_nodes(bmz_config_data_t *bmz, cmph_uint8 *
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
static int bmz_gen_edges(cmph_config_t *mph)
|
||||
{
|
||||
cmph_uint32 e;
|
||||
bmz_config_data_t *bmz = (bmz_config_data_t *)mph->data;
|
||||
cmph_uint8 multiple_edges = 0;
|
||||
DEBUGP("Generating edges for %u vertices\n", bmz->n);
|
||||
graph_clear_edges(bmz->graph);
|
||||
graph_clear_edges(bmz->graph);
|
||||
mph->key_source->rewind(mph->key_source->data);
|
||||
for (e = 0; e < mph->key_source->nkeys; ++e)
|
||||
{
|
||||
@ -426,12 +426,12 @@ static int bmz_gen_edges(cmph_config_t *mph)
|
||||
cmph_uint32 keylen;
|
||||
char *key = NULL;
|
||||
mph->key_source->read(mph->key_source->data, &key, &keylen);
|
||||
|
||||
|
||||
h1 = hash(bmz->hashes[0], key, keylen) % bmz->n;
|
||||
h2 = hash(bmz->hashes[1], key, keylen) % bmz->n;
|
||||
if (h1 == h2) if (++h2 >= bmz->n) h2 = 0;
|
||||
DEBUGP("key: %.*s h1: %u h2: %u\n", keylen, key, h1, h2);
|
||||
if (h1 == h2)
|
||||
if (h1 == h2)
|
||||
{
|
||||
if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e);
|
||||
mph->key_source->dispose(mph->key_source->data, key, keylen);
|
||||
@ -472,7 +472,7 @@ int bmz_dump(cmph_t *mphf, FILE *fd)
|
||||
|
||||
nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd);
|
||||
nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
|
||||
|
||||
|
||||
nbytes = fwrite(data->g, sizeof(cmph_uint32)*(data->n), (size_t)1, fd);
|
||||
#ifdef DEBUG
|
||||
cmph_uint32 i;
|
||||
@ -510,8 +510,8 @@ void bmz_load(FILE *f, cmph_t *mphf)
|
||||
}
|
||||
|
||||
DEBUGP("Reading m and n\n");
|
||||
nbytes = fread(&(bmz->n), sizeof(cmph_uint32), (size_t)1, f);
|
||||
nbytes = fread(&(bmz->m), sizeof(cmph_uint32), (size_t)1, f);
|
||||
nbytes = fread(&(bmz->n), sizeof(cmph_uint32), (size_t)1, f);
|
||||
nbytes = fread(&(bmz->m), sizeof(cmph_uint32), (size_t)1, f);
|
||||
|
||||
bmz->g = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*bmz->n);
|
||||
nbytes = fread(bmz->g, bmz->n*sizeof(cmph_uint32), (size_t)1, f);
|
||||
@ -522,7 +522,7 @@ void bmz_load(FILE *f, cmph_t *mphf)
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
cmph_uint32 bmz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||
{
|
||||
@ -537,7 +537,7 @@ cmph_uint32 bmz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||
void bmz_destroy(cmph_t *mphf)
|
||||
{
|
||||
bmz_data_t *data = (bmz_data_t *)mphf->data;
|
||||
free(data->g);
|
||||
free(data->g);
|
||||
hash_state_destroy(data->hashes[0]);
|
||||
hash_state_destroy(data->hashes[1]);
|
||||
free(data->hashes);
|
||||
@ -548,7 +548,7 @@ void bmz_destroy(cmph_t *mphf)
|
||||
/** \fn void bmz_pack(cmph_t *mphf, void *packed_mphf);
|
||||
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||
* \param mphf pointer to the resulting mphf
|
||||
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||
*/
|
||||
void bmz_pack(cmph_t *mphf, void *packed_mphf)
|
||||
{
|
||||
@ -579,26 +579,26 @@ void bmz_pack(cmph_t *mphf, void *packed_mphf)
|
||||
ptr += sizeof(data->n);
|
||||
|
||||
// packing g
|
||||
memcpy(ptr, data->g, sizeof(cmph_uint32)*data->n);
|
||||
memcpy(ptr, data->g, sizeof(cmph_uint32)*data->n);
|
||||
}
|
||||
|
||||
/** \fn cmph_uint32 bmz_packed_size(cmph_t *mphf);
|
||||
* \brief Return the amount of space needed to pack mphf.
|
||||
* \param mphf pointer to a mphf
|
||||
* \return the size of the packed function or zero for failures
|
||||
*/
|
||||
*/
|
||||
cmph_uint32 bmz_packed_size(cmph_t *mphf)
|
||||
{
|
||||
bmz_data_t *data = (bmz_data_t *)mphf->data;
|
||||
CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
|
||||
CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
|
||||
CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
|
||||
CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
|
||||
|
||||
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
|
||||
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
|
||||
3*sizeof(cmph_uint32) + sizeof(cmph_uint32)*data->n);
|
||||
}
|
||||
|
||||
/** cmph_uint32 bmz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||
* \brief Use the packed mphf to do a search.
|
||||
* \brief Use the packed mphf to do a search.
|
||||
* \param packed_mphf pointer to the packed mphf
|
||||
* \param key key to be hashed
|
||||
* \param keylen key legth in bytes
|
||||
@ -613,13 +613,13 @@ cmph_uint32 bmz_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke
|
||||
register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
|
||||
register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr);
|
||||
h2_ptr += 4;
|
||||
|
||||
|
||||
register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type));
|
||||
|
||||
register cmph_uint32 n = *g_ptr++;
|
||||
|
||||
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n;
|
||||
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n;
|
||||
|
||||
register cmph_uint32 n = *g_ptr++;
|
||||
|
||||
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n;
|
||||
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n;
|
||||
if (h1 == h2 && ++h2 > n) h2 = 0;
|
||||
return (g_ptr[h1] + g_ptr[h2]);
|
||||
return (g_ptr[h1] + g_ptr[h2]);
|
||||
}
|
||||
|
128
src/bmz8.c
128
src/bmz8.c
@ -23,7 +23,7 @@ bmz8_config_data_t *bmz8_config_new(void)
|
||||
{
|
||||
bmz8_config_data_t *bmz8;
|
||||
bmz8 = (bmz8_config_data_t *)malloc(sizeof(bmz8_config_data_t));
|
||||
assert(bmz8);
|
||||
if (!bmz8) return NULL;
|
||||
memset(bmz8, 0, sizeof(bmz8_config_data_t));
|
||||
bmz8->hashfuncs[0] = CMPH_HASH_JENKINS;
|
||||
bmz8->hashfuncs[1] = CMPH_HASH_JENKINS;
|
||||
@ -48,7 +48,7 @@ void bmz8_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
|
||||
while(*hashptr != CMPH_HASH_COUNT)
|
||||
{
|
||||
if (i >= 2) break; //bmz8 only uses two hash functions
|
||||
bmz8->hashfuncs[i] = *hashptr;
|
||||
bmz8->hashfuncs[i] = *hashptr;
|
||||
++i, ++hashptr;
|
||||
}
|
||||
}
|
||||
@ -64,7 +64,7 @@ cmph_t *bmz8_new(cmph_config_t *mph, double c)
|
||||
cmph_uint8 restart_mapping = 0;
|
||||
cmph_uint8 * visited = NULL;
|
||||
bmz8_config_data_t *bmz8 = (bmz8_config_data_t *)mph->data;
|
||||
|
||||
|
||||
if (mph->key_source->nkeys >= 256)
|
||||
{
|
||||
if (mph->verbosity) fprintf(stderr, "The number of keys in BMZ8 must be lower than 256.\n");
|
||||
@ -72,8 +72,8 @@ cmph_t *bmz8_new(cmph_config_t *mph, double c)
|
||||
}
|
||||
if (c == 0) c = 1.15; // validating restrictions over parameter c.
|
||||
DEBUGP("c: %f\n", c);
|
||||
bmz8->m = (cmph_uint8) mph->key_source->nkeys;
|
||||
bmz8->n = (cmph_uint8) ceil(c * mph->key_source->nkeys);
|
||||
bmz8->m = (cmph_uint8) mph->key_source->nkeys;
|
||||
bmz8->n = (cmph_uint8) ceil(c * mph->key_source->nkeys);
|
||||
DEBUGP("m (edges): %u n (vertices): %u c: %f\n", bmz8->m, bmz8->n, c);
|
||||
bmz8->graph = graph_new(bmz8->n, bmz8->m);
|
||||
DEBUGP("Created graph\n");
|
||||
@ -113,8 +113,8 @@ cmph_t *bmz8_new(cmph_config_t *mph, double c)
|
||||
fprintf(stderr, "simple graph creation failure - %u iterations remaining\n", iterations);
|
||||
}
|
||||
if (iterations == 0) break;
|
||||
}
|
||||
else break;
|
||||
}
|
||||
else break;
|
||||
}
|
||||
if (iterations == 0)
|
||||
{
|
||||
@ -161,19 +161,19 @@ cmph_t *bmz8_new(cmph_config_t *mph, double c)
|
||||
}
|
||||
bmz8_traverse_non_critical_nodes(bmz8, used_edges, visited); // non_critical_nodes
|
||||
}
|
||||
else
|
||||
else
|
||||
{
|
||||
iterations_map--;
|
||||
if (mph->verbosity) fprintf(stderr, "Restarting mapping step. %u iterations remaining.\n", iterations_map);
|
||||
}
|
||||
}
|
||||
|
||||
free(used_edges);
|
||||
free(visited);
|
||||
|
||||
}while(restart_mapping && iterations_map > 0);
|
||||
graph_destroy(bmz8->graph);
|
||||
graph_destroy(bmz8->graph);
|
||||
bmz8->graph = NULL;
|
||||
if (iterations_map == 0)
|
||||
if (iterations_map == 0)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
@ -213,15 +213,15 @@ static cmph_uint8 bmz8_traverse_critical_nodes(bmz8_config_data_t *bmz8, cmph_ui
|
||||
while(!vqueue_is_empty(q))
|
||||
{
|
||||
v = vqueue_remove(q);
|
||||
it = graph_neighbors_it(bmz8->graph, v);
|
||||
it = graph_neighbors_it(bmz8->graph, v);
|
||||
while ((u = graph_next_neighbor(bmz8->graph, &it)) != GRAPH_NO_NEIGHBOR)
|
||||
{
|
||||
{
|
||||
if (graph_node_is_critical(bmz8->graph, u) && (!GETBIT(visited,u)))
|
||||
{
|
||||
collision = 1;
|
||||
while(collision) // lookahead to resolve collisions
|
||||
{
|
||||
next_g = (cmph_uint8)(*biggest_g_value + 1);
|
||||
next_g = (cmph_uint8)(*biggest_g_value + 1);
|
||||
it1 = graph_neighbors_it(bmz8->graph, u);
|
||||
collision = 0;
|
||||
while((lav = graph_next_neighbor(bmz8->graph, &it1)) != GRAPH_NO_NEIGHBOR)
|
||||
@ -233,7 +233,7 @@ static cmph_uint8 bmz8_traverse_critical_nodes(bmz8_config_data_t *bmz8, cmph_ui
|
||||
vqueue_destroy(q);
|
||||
return 1; // restart mapping step.
|
||||
}
|
||||
if (GETBIT(used_edges, (next_g + bmz8->g[lav])))
|
||||
if (GETBIT(used_edges, (next_g + bmz8->g[lav])))
|
||||
{
|
||||
collision = 1;
|
||||
break;
|
||||
@ -241,7 +241,7 @@ static cmph_uint8 bmz8_traverse_critical_nodes(bmz8_config_data_t *bmz8, cmph_ui
|
||||
}
|
||||
}
|
||||
if (next_g > *biggest_g_value) *biggest_g_value = next_g;
|
||||
}
|
||||
}
|
||||
// Marking used edges...
|
||||
it1 = graph_neighbors_it(bmz8->graph, u);
|
||||
while((lav = graph_next_neighbor(bmz8->graph, &it1)) != GRAPH_NO_NEIGHBOR)
|
||||
@ -250,16 +250,16 @@ static cmph_uint8 bmz8_traverse_critical_nodes(bmz8_config_data_t *bmz8, cmph_ui
|
||||
{
|
||||
SETBIT(used_edges,(next_g + bmz8->g[lav]));
|
||||
|
||||
if(next_g + bmz8->g[lav] > *biggest_edge_value)
|
||||
if(next_g + bmz8->g[lav] > *biggest_edge_value)
|
||||
*biggest_edge_value = (cmph_uint8)(next_g + bmz8->g[lav]);
|
||||
}
|
||||
}
|
||||
bmz8->g[u] = next_g; // Labelling vertex u.
|
||||
SETBIT(visited,u);
|
||||
vqueue_insert(q, u);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
vqueue_destroy(q);
|
||||
return 0;
|
||||
@ -268,8 +268,8 @@ static cmph_uint8 bmz8_traverse_critical_nodes(bmz8_config_data_t *bmz8, cmph_ui
|
||||
static cmph_uint8 bmz8_traverse_critical_nodes_heuristic(bmz8_config_data_t *bmz8, cmph_uint32 v, cmph_uint8 * biggest_g_value, cmph_uint8 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited)
|
||||
{
|
||||
cmph_uint8 next_g;
|
||||
cmph_uint32 u;
|
||||
cmph_uint32 lav;
|
||||
cmph_uint32 u;
|
||||
cmph_uint32 lav;
|
||||
cmph_uint8 collision;
|
||||
cmph_uint8 * unused_g_values = NULL;
|
||||
cmph_uint8 unused_g_values_capacity = 0;
|
||||
@ -280,27 +280,27 @@ static cmph_uint8 bmz8_traverse_critical_nodes_heuristic(bmz8_config_data_t *bmz
|
||||
DEBUGP("Labelling critical vertices\n");
|
||||
bmz8->g[v] = (cmph_uint8)(ceil ((double)(*biggest_edge_value)/2) - 1);
|
||||
SETBIT(visited, v);
|
||||
next_g = (cmph_uint8)floor((double)(*biggest_edge_value/2));
|
||||
next_g = (cmph_uint8)floor((double)(*biggest_edge_value/2));
|
||||
vqueue_insert(q, v);
|
||||
while(!vqueue_is_empty(q))
|
||||
{
|
||||
v = vqueue_remove(q);
|
||||
it = graph_neighbors_it(bmz8->graph, v);
|
||||
it = graph_neighbors_it(bmz8->graph, v);
|
||||
while ((u = graph_next_neighbor(bmz8->graph, &it)) != GRAPH_NO_NEIGHBOR)
|
||||
{
|
||||
{
|
||||
if (graph_node_is_critical(bmz8->graph, u) && (!GETBIT(visited,u)))
|
||||
{
|
||||
cmph_uint8 next_g_index = 0;
|
||||
collision = 1;
|
||||
while(collision) // lookahead to resolve collisions
|
||||
{
|
||||
if (next_g_index < nunused_g_values)
|
||||
if (next_g_index < nunused_g_values)
|
||||
{
|
||||
next_g = unused_g_values[next_g_index++];
|
||||
}
|
||||
else
|
||||
else
|
||||
{
|
||||
next_g = (cmph_uint8)(*biggest_g_value + 1);
|
||||
next_g = (cmph_uint8)(*biggest_g_value + 1);
|
||||
next_g_index = 255;//UINT_MAX;
|
||||
}
|
||||
it1 = graph_neighbors_it(bmz8->graph, u);
|
||||
@ -315,7 +315,7 @@ static cmph_uint8 bmz8_traverse_critical_nodes_heuristic(bmz8_config_data_t *bmz
|
||||
free(unused_g_values);
|
||||
return 1; // restart mapping step.
|
||||
}
|
||||
if (GETBIT(used_edges, (next_g + bmz8->g[lav])))
|
||||
if (GETBIT(used_edges, (next_g + bmz8->g[lav])))
|
||||
{
|
||||
collision = 1;
|
||||
break;
|
||||
@ -327,14 +327,14 @@ static cmph_uint8 bmz8_traverse_critical_nodes_heuristic(bmz8_config_data_t *bmz
|
||||
if(nunused_g_values == unused_g_values_capacity)
|
||||
{
|
||||
unused_g_values = (cmph_uint8*)realloc(unused_g_values, ((size_t)(unused_g_values_capacity + BUFSIZ))*sizeof(cmph_uint8));
|
||||
unused_g_values_capacity += (cmph_uint8)BUFSIZ;
|
||||
}
|
||||
unused_g_values[nunused_g_values++] = next_g;
|
||||
unused_g_values_capacity += (cmph_uint8)BUFSIZ;
|
||||
}
|
||||
unused_g_values[nunused_g_values++] = next_g;
|
||||
|
||||
}
|
||||
if (next_g > *biggest_g_value) *biggest_g_value = next_g;
|
||||
}
|
||||
|
||||
|
||||
next_g_index--;
|
||||
if (next_g_index < nunused_g_values) unused_g_values[next_g_index] = unused_g_values[--nunused_g_values];
|
||||
|
||||
@ -345,22 +345,22 @@ static cmph_uint8 bmz8_traverse_critical_nodes_heuristic(bmz8_config_data_t *bmz
|
||||
if (graph_node_is_critical(bmz8->graph, lav) && GETBIT(visited, lav))
|
||||
{
|
||||
SETBIT(used_edges,(next_g + bmz8->g[lav]));
|
||||
if(next_g + bmz8->g[lav] > *biggest_edge_value)
|
||||
if(next_g + bmz8->g[lav] > *biggest_edge_value)
|
||||
*biggest_edge_value = (cmph_uint8)(next_g + bmz8->g[lav]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bmz8->g[u] = next_g; // Labelling vertex u.
|
||||
SETBIT(visited, u);
|
||||
vqueue_insert(q, u);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
vqueue_destroy(q);
|
||||
free(unused_g_values);
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static cmph_uint8 next_unused_edge(bmz8_config_data_t *bmz8, cmph_uint8 * used_edges, cmph_uint32 unused_edge_index)
|
||||
@ -388,8 +388,8 @@ static void bmz8_traverse(bmz8_config_data_t *bmz8, cmph_uint8 * used_edges, cmp
|
||||
SETBIT(visited, neighbor);
|
||||
(*unused_edge_index)++;
|
||||
bmz8_traverse(bmz8, used_edges, neighbor, unused_edge_index, visited);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
static void bmz8_traverse_non_critical_nodes(bmz8_config_data_t *bmz8, cmph_uint8 * used_edges, cmph_uint8 * visited)
|
||||
@ -401,7 +401,7 @@ static void bmz8_traverse_non_critical_nodes(bmz8_config_data_t *bmz8, cmph_uint
|
||||
{
|
||||
v1 = (cmph_uint8)graph_vertex_id(bmz8->graph, i, 0);
|
||||
v2 = (cmph_uint8)graph_vertex_id(bmz8->graph, i, 1);
|
||||
if((GETBIT(visited,v1) && GETBIT(visited,v2)) || (!GETBIT(visited,v1) && !GETBIT(visited,v2))) continue;
|
||||
if((GETBIT(visited,v1) && GETBIT(visited,v2)) || (!GETBIT(visited,v1) && !GETBIT(visited,v2))) continue;
|
||||
if(GETBIT(visited,v1)) bmz8_traverse(bmz8, used_edges, v1, &unused_edge_index, visited);
|
||||
else bmz8_traverse(bmz8, used_edges, v2, &unused_edge_index, visited);
|
||||
|
||||
@ -410,7 +410,7 @@ static void bmz8_traverse_non_critical_nodes(bmz8_config_data_t *bmz8, cmph_uint
|
||||
for(i = 0; i < bmz8->n; i++)
|
||||
{
|
||||
if(!GETBIT(visited,i))
|
||||
{
|
||||
{
|
||||
bmz8->g[i] = 0;
|
||||
SETBIT(visited, i);
|
||||
bmz8_traverse(bmz8, used_edges, i, &unused_edge_index, visited);
|
||||
@ -418,14 +418,14 @@ static void bmz8_traverse_non_critical_nodes(bmz8_config_data_t *bmz8, cmph_uint
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
static int bmz8_gen_edges(cmph_config_t *mph)
|
||||
{
|
||||
cmph_uint8 e;
|
||||
bmz8_config_data_t *bmz8 = (bmz8_config_data_t *)mph->data;
|
||||
cmph_uint8 multiple_edges = 0;
|
||||
DEBUGP("Generating edges for %u vertices\n", bmz8->n);
|
||||
graph_clear_edges(bmz8->graph);
|
||||
graph_clear_edges(bmz8->graph);
|
||||
mph->key_source->rewind(mph->key_source->data);
|
||||
for (e = 0; e < mph->key_source->nkeys; ++e)
|
||||
{
|
||||
@ -433,12 +433,12 @@ static int bmz8_gen_edges(cmph_config_t *mph)
|
||||
cmph_uint32 keylen;
|
||||
char *key = NULL;
|
||||
mph->key_source->read(mph->key_source->data, &key, &keylen);
|
||||
|
||||
|
||||
// if (key == NULL)fprintf(stderr, "key = %s -- read BMZ\n", key);
|
||||
h1 = (cmph_uint8)(hash(bmz8->hashes[0], key, keylen) % bmz8->n);
|
||||
h2 = (cmph_uint8)(hash(bmz8->hashes[1], key, keylen) % bmz8->n);
|
||||
if (h1 == h2) if (++h2 >= bmz8->n) h2 = 0;
|
||||
if (h1 == h2)
|
||||
if (h1 == h2)
|
||||
{
|
||||
if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e);
|
||||
mph->key_source->dispose(mph->key_source->data, key, keylen);
|
||||
@ -480,7 +480,7 @@ int bmz8_dump(cmph_t *mphf, FILE *fd)
|
||||
|
||||
nbytes = fwrite(&(data->n), sizeof(cmph_uint8), (size_t)1, fd);
|
||||
nbytes = fwrite(&(data->m), sizeof(cmph_uint8), (size_t)1, fd);
|
||||
|
||||
|
||||
nbytes = fwrite(data->g, sizeof(cmph_uint8)*(data->n), (size_t)1, fd);
|
||||
/* #ifdef DEBUG
|
||||
fprintf(stderr, "G: ");
|
||||
@ -518,8 +518,8 @@ void bmz8_load(FILE *f, cmph_t *mphf)
|
||||
}
|
||||
|
||||
DEBUGP("Reading m and n\n");
|
||||
nbytes = fread(&(bmz8->n), sizeof(cmph_uint8), (size_t)1, f);
|
||||
nbytes = fread(&(bmz8->m), sizeof(cmph_uint8), (size_t)1, f);
|
||||
nbytes = fread(&(bmz8->n), sizeof(cmph_uint8), (size_t)1, f);
|
||||
nbytes = fread(&(bmz8->m), sizeof(cmph_uint8), (size_t)1, f);
|
||||
|
||||
bmz8->g = (cmph_uint8 *)malloc(sizeof(cmph_uint8)*bmz8->n);
|
||||
nbytes = fread(bmz8->g, bmz8->n*sizeof(cmph_uint8), (size_t)1, f);
|
||||
@ -530,7 +530,7 @@ void bmz8_load(FILE *f, cmph_t *mphf)
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
cmph_uint8 bmz8_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||
{
|
||||
@ -556,7 +556,7 @@ void bmz8_destroy(cmph_t *mphf)
|
||||
/** \fn void bmz8_pack(cmph_t *mphf, void *packed_mphf);
|
||||
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||
* \param mphf pointer to the resulting mphf
|
||||
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||
*/
|
||||
void bmz8_pack(cmph_t *mphf, void *packed_mphf)
|
||||
{
|
||||
@ -585,26 +585,26 @@ void bmz8_pack(cmph_t *mphf, void *packed_mphf)
|
||||
*ptr++ = data->n;
|
||||
|
||||
// packing g
|
||||
memcpy(ptr, data->g, sizeof(cmph_uint8)*data->n);
|
||||
memcpy(ptr, data->g, sizeof(cmph_uint8)*data->n);
|
||||
}
|
||||
|
||||
/** \fn cmph_uint32 bmz8_packed_size(cmph_t *mphf);
|
||||
* \brief Return the amount of space needed to pack mphf.
|
||||
* \param mphf pointer to a mphf
|
||||
* \return the size of the packed function or zero for failures
|
||||
*/
|
||||
*/
|
||||
cmph_uint32 bmz8_packed_size(cmph_t *mphf)
|
||||
{
|
||||
bmz8_data_t *data = (bmz8_data_t *)mphf->data;
|
||||
CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
|
||||
CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
|
||||
CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
|
||||
CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
|
||||
|
||||
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
|
||||
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
|
||||
2*sizeof(cmph_uint32) + sizeof(cmph_uint8) + sizeof(cmph_uint8)*data->n);
|
||||
}
|
||||
|
||||
/** cmph_uint8 bmz8_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||
* \brief Use the packed mphf to do a search.
|
||||
* \brief Use the packed mphf to do a search.
|
||||
* \param packed_mphf pointer to the packed mphf
|
||||
* \param key key to be hashed
|
||||
* \param keylen key legth in bytes
|
||||
@ -619,14 +619,14 @@ cmph_uint8 bmz8_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke
|
||||
register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
|
||||
register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr);
|
||||
h2_ptr += 4;
|
||||
|
||||
|
||||
register cmph_uint8 *g_ptr = h2_ptr + hash_state_packed_size(h2_type);
|
||||
|
||||
register cmph_uint8 n = *g_ptr++;
|
||||
|
||||
register cmph_uint8 h1 = (cmph_uint8)(hash_packed(h1_ptr, h1_type, key, keylen) % n);
|
||||
register cmph_uint8 h2 = (cmph_uint8)(hash_packed(h2_ptr, h2_type, key, keylen) % n);
|
||||
|
||||
register cmph_uint8 n = *g_ptr++;
|
||||
|
||||
register cmph_uint8 h1 = (cmph_uint8)(hash_packed(h1_ptr, h1_type, key, keylen) % n);
|
||||
register cmph_uint8 h2 = (cmph_uint8)(hash_packed(h2_ptr, h2_type, key, keylen) % n);
|
||||
DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
|
||||
if (h1 == h2 && ++h2 > n) h2 = 0;
|
||||
return (cmph_uint8)(g_ptr[h1] + g_ptr[h2]);
|
||||
return (cmph_uint8)(g_ptr[h1] + g_ptr[h2]);
|
||||
}
|
||||
|
190
src/brz.c
190
src/brz.c
@ -26,8 +26,9 @@ static char * brz_copy_partial_fch_mphf(brz_config_data_t *brz, fch_data_t * fch
|
||||
static char * brz_copy_partial_bmz8_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_uint32 *buflen);
|
||||
brz_config_data_t *brz_config_new(void)
|
||||
{
|
||||
brz_config_data_t *brz = NULL;
|
||||
brz_config_data_t *brz = NULL;
|
||||
brz = (brz_config_data_t *)malloc(sizeof(brz_config_data_t));
|
||||
if (!brz) return NULL;
|
||||
brz->algo = CMPH_FCH;
|
||||
brz->b = 128;
|
||||
brz->hashfuncs[0] = CMPH_HASH_JENKINS;
|
||||
@ -42,7 +43,7 @@ brz_config_data_t *brz_config_new(void)
|
||||
brz->memory_availability = 1024*1024;
|
||||
brz->tmp_dir = (cmph_uint8 *)calloc((size_t)10, sizeof(cmph_uint8));
|
||||
brz->mphf_fd = NULL;
|
||||
strcpy((char *)(brz->tmp_dir), "/var/tmp/");
|
||||
strcpy((char *)(brz->tmp_dir), "/var/tmp/");
|
||||
assert(brz);
|
||||
return brz;
|
||||
}
|
||||
@ -63,7 +64,7 @@ void brz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
|
||||
while(*hashptr != CMPH_HASH_COUNT)
|
||||
{
|
||||
if (i >= 3) break; //brz only uses three hash functions
|
||||
brz->hashfuncs[i] = *hashptr;
|
||||
brz->hashfuncs[i] = *hashptr;
|
||||
++i, ++hashptr;
|
||||
}
|
||||
}
|
||||
@ -84,14 +85,14 @@ void brz_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir)
|
||||
if(tmp_dir[len-1] != '/')
|
||||
{
|
||||
brz->tmp_dir = (cmph_uint8 *)calloc((size_t)len+2, sizeof(cmph_uint8));
|
||||
sprintf((char *)(brz->tmp_dir), "%s/", (char *)tmp_dir);
|
||||
sprintf((char *)(brz->tmp_dir), "%s/", (char *)tmp_dir);
|
||||
}
|
||||
else
|
||||
{
|
||||
brz->tmp_dir = (cmph_uint8 *)calloc((size_t)len+1, sizeof(cmph_uint8));
|
||||
sprintf((char *)(brz->tmp_dir), "%s", (char *)tmp_dir);
|
||||
sprintf((char *)(brz->tmp_dir), "%s", (char *)tmp_dir);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -105,14 +106,14 @@ void brz_config_set_mphf_fd(cmph_config_t *mph, FILE *mphf_fd)
|
||||
void brz_config_set_b(cmph_config_t *mph, cmph_uint32 b)
|
||||
{
|
||||
brz_config_data_t *brz = (brz_config_data_t *)mph->data;
|
||||
if(b <= 64 || b >= 175)
|
||||
if(b <= 64 || b >= 175)
|
||||
{
|
||||
b = 128;
|
||||
}
|
||||
brz->b = (cmph_uint8)b;
|
||||
}
|
||||
|
||||
void brz_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo)
|
||||
void brz_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo)
|
||||
{
|
||||
if (algo == CMPH_BMZ8 || algo == CMPH_FCH) // supported algorithms
|
||||
{
|
||||
@ -147,13 +148,13 @@ cmph_t *brz_new(cmph_config_t *mph, double c)
|
||||
brz->k = (cmph_uint32)ceil(brz->m/((double)brz->b));
|
||||
DEBUGP("k: %u\n", brz->k);
|
||||
brz->size = (cmph_uint8 *) calloc((size_t)brz->k, sizeof(cmph_uint8));
|
||||
|
||||
|
||||
// Clustering the keys by graph id.
|
||||
if (mph->verbosity)
|
||||
{
|
||||
fprintf(stderr, "Partioning the set of keys.\n");
|
||||
fprintf(stderr, "Partioning the set of keys.\n");
|
||||
}
|
||||
|
||||
|
||||
while(1)
|
||||
{
|
||||
int ok;
|
||||
@ -172,17 +173,17 @@ cmph_t *brz_new(cmph_config_t *mph, double c)
|
||||
fprintf(stderr, "Failure: A graph with more than 255 keys was created - %u iterations remaining\n", iterations);
|
||||
}
|
||||
if (iterations == 0) break;
|
||||
}
|
||||
else break;
|
||||
}
|
||||
else break;
|
||||
}
|
||||
if (iterations == 0)
|
||||
if (iterations == 0)
|
||||
{
|
||||
DEBUGP("Graphs with more than 255 keys were created in all 20 iterations\n");
|
||||
free(brz->size);
|
||||
return NULL;
|
||||
}
|
||||
DEBUGP("Graphs generated\n");
|
||||
|
||||
|
||||
brz->offset = (cmph_uint32 *)calloc((size_t)brz->k, sizeof(cmph_uint32));
|
||||
for (i = 1; i < brz->k; ++i)
|
||||
{
|
||||
@ -209,7 +210,7 @@ cmph_t *brz_new(cmph_config_t *mph, double c)
|
||||
brzf->m = brz->m;
|
||||
brzf->algo = brz->algo;
|
||||
mphf->data = brzf;
|
||||
mphf->size = brz->m;
|
||||
mphf->size = brz->m;
|
||||
DEBUGP("Successfully generated minimal perfect hash\n");
|
||||
if (mph->verbosity)
|
||||
{
|
||||
@ -240,7 +241,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
|
||||
cmph_uint32 cur_bucket = 0;
|
||||
cmph_uint8 nkeys_vd = 0;
|
||||
cmph_uint8 ** keys_vd = NULL;
|
||||
|
||||
|
||||
mph->key_source->rewind(mph->key_source->data);
|
||||
DEBUGP("Generating graphs from %u keys\n", brz->m);
|
||||
// Partitioning
|
||||
@ -249,7 +250,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
|
||||
mph->key_source->read(mph->key_source->data, &key, &keylen);
|
||||
|
||||
/* Buffers management */
|
||||
if (memory_usage + keylen + sizeof(keylen) > brz->memory_availability) // flush buffers
|
||||
if (memory_usage + keylen + sizeof(keylen) > brz->memory_availability) // flush buffers
|
||||
{
|
||||
if(mph->verbosity)
|
||||
{
|
||||
@ -265,8 +266,8 @@ static int brz_gen_mphf(cmph_config_t *mph)
|
||||
sum += value;
|
||||
value = buckets_size[i];
|
||||
buckets_size[i] = sum;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
memory_usage = 0;
|
||||
keys_index = (cmph_uint32 *)calloc((size_t)nkeys_in_buffer, sizeof(cmph_uint32));
|
||||
for(i = 0; i < nkeys_in_buffer; i++)
|
||||
@ -298,8 +299,8 @@ static int brz_gen_mphf(cmph_config_t *mph)
|
||||
memcpy(buffer + memory_usage + sizeof(keylen), key, (size_t)keylen);
|
||||
memory_usage += keylen + (cmph_uint32)sizeof(keylen);
|
||||
h0 = hash(brz->h0, key, keylen) % brz->k;
|
||||
|
||||
if ((brz->size[h0] == MAX_BUCKET_SIZE) || (brz->algo == CMPH_BMZ8 && ((brz->c >= 1.0) && (cmph_uint8)(brz->c * brz->size[h0]) < brz->size[h0])))
|
||||
|
||||
if ((brz->size[h0] == MAX_BUCKET_SIZE) || (brz->algo == CMPH_BMZ8 && ((brz->c >= 1.0) && (cmph_uint8)(brz->c * brz->size[h0]) < brz->size[h0])))
|
||||
{
|
||||
free(buffer);
|
||||
free(buckets_size);
|
||||
@ -310,8 +311,8 @@ static int brz_gen_mphf(cmph_config_t *mph)
|
||||
nkeys_in_buffer++;
|
||||
mph->key_source->dispose(mph->key_source->data, key, keylen);
|
||||
}
|
||||
if (memory_usage != 0) // flush buffers
|
||||
{
|
||||
if (memory_usage != 0) // flush buffers
|
||||
{
|
||||
if(mph->verbosity)
|
||||
{
|
||||
fprintf(stderr, "Flushing %u\n", nkeys_in_buffer);
|
||||
@ -370,12 +371,12 @@ static int brz_gen_mphf(cmph_config_t *mph)
|
||||
nbytes = fwrite(&(brz->algo), sizeof(brz->algo), (size_t)1, brz->mphf_fd);
|
||||
nbytes = fwrite(&(brz->k), sizeof(cmph_uint32), (size_t)1, brz->mphf_fd); // number of MPHFs
|
||||
nbytes = fwrite(brz->size, sizeof(cmph_uint8)*(brz->k), (size_t)1, brz->mphf_fd);
|
||||
|
||||
|
||||
//tmp_fds = (FILE **)calloc(nflushes, sizeof(FILE *));
|
||||
buff_manager = buffer_manager_new(brz->memory_availability, nflushes);
|
||||
buffer_merge = (cmph_uint8 **)calloc((size_t)nflushes, sizeof(cmph_uint8 *));
|
||||
buffer_h0 = (cmph_uint32 *)calloc((size_t)nflushes, sizeof(cmph_uint32));
|
||||
|
||||
|
||||
memory_usage = 0;
|
||||
for(i = 0; i < nflushes; i++)
|
||||
{
|
||||
@ -388,7 +389,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
|
||||
h0 = hash(brz->h0, key+sizeof(keylen), keylen) % brz->k;
|
||||
buffer_h0[i] = h0;
|
||||
buffer_merge[i] = (cmph_uint8 *)key;
|
||||
key = NULL; //transfer memory ownership
|
||||
key = NULL; //transfer memory ownership
|
||||
}
|
||||
e = 0;
|
||||
keys_vd = (cmph_uint8 **)calloc((size_t)MAX_BUCKET_SIZE, sizeof(cmph_uint8 *));
|
||||
@ -429,7 +430,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
|
||||
e++;
|
||||
buffer_h0[i] = UINT_MAX;
|
||||
}
|
||||
|
||||
|
||||
if(nkeys_vd == brz->size[cur_bucket]) // Generating mphf for each bucket.
|
||||
{
|
||||
cmph_io_adapter_t *source = NULL;
|
||||
@ -444,7 +445,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
|
||||
//cmph_config_set_algo(config, CMPH_BMZ8);
|
||||
cmph_config_set_graphsize(config, brz->c);
|
||||
mphf_tmp = cmph_new(config);
|
||||
if (mphf_tmp == NULL)
|
||||
if (mphf_tmp == NULL)
|
||||
{
|
||||
if(mph->verbosity) fprintf(stderr, "ERROR: Can't generate MPHF for bucket %u out of %u\n", cur_bucket + 1, brz->k);
|
||||
error = 1;
|
||||
@ -453,9 +454,9 @@ static int brz_gen_mphf(cmph_config_t *mph)
|
||||
cmph_io_byte_vector_adapter_destroy(source);
|
||||
break;
|
||||
}
|
||||
if(mph->verbosity)
|
||||
if(mph->verbosity)
|
||||
{
|
||||
if (cur_bucket % 1000 == 0)
|
||||
if (cur_bucket % 1000 == 0)
|
||||
{
|
||||
fprintf(stderr, "MPHF for bucket %u out of %u was generated.\n", cur_bucket + 1, brz->k);
|
||||
}
|
||||
@ -465,7 +466,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
|
||||
case CMPH_FCH:
|
||||
{
|
||||
fch_data_t * fchf = NULL;
|
||||
fchf = (fch_data_t *)mphf_tmp->data;
|
||||
fchf = (fch_data_t *)mphf_tmp->data;
|
||||
bufmphf = brz_copy_partial_fch_mphf(brz, fchf, cur_bucket, &buflenmphf);
|
||||
}
|
||||
break;
|
||||
@ -516,7 +517,7 @@ static char * brz_copy_partial_fch_mphf(brz_config_data_t *brz, fch_data_t * fch
|
||||
{
|
||||
cmph_uint32 i = 0;
|
||||
cmph_uint32 buflenh1 = 0;
|
||||
cmph_uint32 buflenh2 = 0;
|
||||
cmph_uint32 buflenh2 = 0;
|
||||
char * bufh1 = NULL;
|
||||
char * bufh2 = NULL;
|
||||
char * buf = NULL;
|
||||
@ -528,7 +529,7 @@ static char * brz_copy_partial_fch_mphf(brz_config_data_t *brz, fch_data_t * fch
|
||||
memcpy(buf, &buflenh1, sizeof(cmph_uint32));
|
||||
memcpy(buf+sizeof(cmph_uint32), bufh1, (size_t)buflenh1);
|
||||
memcpy(buf+sizeof(cmph_uint32)+buflenh1, &buflenh2, sizeof(cmph_uint32));
|
||||
memcpy(buf+2*sizeof(cmph_uint32)+buflenh1, bufh2, (size_t)buflenh2);
|
||||
memcpy(buf+2*sizeof(cmph_uint32)+buflenh1, bufh2, (size_t)buflenh2);
|
||||
for (i = 0; i < n; i++) memcpy(buf+2*sizeof(cmph_uint32)+buflenh1+buflenh2+i,(fchf->g + i), (size_t)1);
|
||||
free(bufh1);
|
||||
free(bufh2);
|
||||
@ -537,7 +538,7 @@ static char * brz_copy_partial_fch_mphf(brz_config_data_t *brz, fch_data_t * fch
|
||||
static char * brz_copy_partial_bmz8_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_uint32 *buflen)
|
||||
{
|
||||
cmph_uint32 buflenh1 = 0;
|
||||
cmph_uint32 buflenh2 = 0;
|
||||
cmph_uint32 buflenh2 = 0;
|
||||
char * bufh1 = NULL;
|
||||
char * bufh2 = NULL;
|
||||
char * buf = NULL;
|
||||
@ -572,7 +573,7 @@ int brz_dump(cmph_t *mphf, FILE *fd)
|
||||
nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd);
|
||||
free(buf);
|
||||
// Dumping m and the vector offset.
|
||||
nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
|
||||
nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
|
||||
nbytes = fwrite(data->offset, sizeof(cmph_uint32)*(data->k), (size_t)1, fd);
|
||||
return 1;
|
||||
}
|
||||
@ -591,7 +592,7 @@ void brz_load(FILE *f, cmph_t *mphf)
|
||||
nbytes = fread(&(brz->algo), sizeof(brz->algo), (size_t)1, f); // Reading algo.
|
||||
nbytes = fread(&(brz->k), sizeof(cmph_uint32), (size_t)1, f);
|
||||
brz->size = (cmph_uint8 *) malloc(sizeof(cmph_uint8)*brz->k);
|
||||
nbytes = fread(brz->size, sizeof(cmph_uint8)*(brz->k), (size_t)1, f);
|
||||
nbytes = fread(brz->size, sizeof(cmph_uint8)*(brz->k), (size_t)1, f);
|
||||
brz->h1 = (hash_state_t **)malloc(sizeof(hash_state_t *)*brz->k);
|
||||
brz->h2 = (hash_state_t **)malloc(sizeof(hash_state_t *)*brz->k);
|
||||
brz->g = (cmph_uint8 **) calloc((size_t)brz->k, sizeof(cmph_uint8 *));
|
||||
@ -635,7 +636,7 @@ void brz_load(FILE *f, cmph_t *mphf)
|
||||
brz->h0 = hash_state_load(buf, buflen);
|
||||
free(buf);
|
||||
|
||||
//loading c, m, and the vector offset.
|
||||
//loading c, m, and the vector offset.
|
||||
nbytes = fread(&(brz->m), sizeof(cmph_uint32), (size_t)1, f);
|
||||
brz->offset = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*brz->k);
|
||||
nbytes = fread(brz->offset, sizeof(cmph_uint32)*(brz->k), (size_t)1, f);
|
||||
@ -654,9 +655,9 @@ static cmph_uint32 brz_bmz8_search(brz_data_t *brz, const char *key, cmph_uint32
|
||||
register cmph_uint32 h1 = hash(brz->h1[h0], key, keylen) % n;
|
||||
register cmph_uint32 h2 = hash(brz->h2[h0], key, keylen) % n;
|
||||
register cmph_uint8 mphf_bucket;
|
||||
|
||||
|
||||
if (h1 == h2 && ++h2 >= n) h2 = 0;
|
||||
mphf_bucket = (cmph_uint8)(brz->g[h0][h1] + brz->g[h0][h2]);
|
||||
mphf_bucket = (cmph_uint8)(brz->g[h0][h1] + brz->g[h0][h2]);
|
||||
DEBUGP("key: %s h1: %u h2: %u h0: %u\n", key, h1, h2, h0);
|
||||
DEBUGP("key: %s g[h1]: %u g[h2]: %u offset[h0]: %u edges: %u\n", key, brz->g[h0][h1], brz->g[h0][h2], brz->offset[h0], brz->m);
|
||||
DEBUGP("Address: %u\n", mphf_bucket + brz->offset[h0]);
|
||||
@ -722,61 +723,61 @@ void brz_destroy(cmph_t *mphf)
|
||||
/** \fn void brz_pack(cmph_t *mphf, void *packed_mphf);
|
||||
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||
* \param mphf pointer to the resulting mphf
|
||||
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||
*/
|
||||
void brz_pack(cmph_t *mphf, void *packed_mphf)
|
||||
{
|
||||
brz_data_t *data = (brz_data_t *)mphf->data;
|
||||
cmph_uint8 * ptr = packed_mphf;
|
||||
cmph_uint32 i,n;
|
||||
|
||||
|
||||
// packing internal algo type
|
||||
memcpy(ptr, &(data->algo), sizeof(data->algo));
|
||||
ptr += sizeof(data->algo);
|
||||
|
||||
// packing h0 type
|
||||
CMPH_HASH h0_type = hash_get_type(data->h0);
|
||||
CMPH_HASH h0_type = hash_get_type(data->h0);
|
||||
memcpy(ptr, &h0_type, sizeof(h0_type));
|
||||
ptr += sizeof(h0_type);
|
||||
|
||||
// packing h0
|
||||
hash_state_pack(data->h0, ptr);
|
||||
ptr += hash_state_packed_size(h0_type);
|
||||
|
||||
|
||||
// packing k
|
||||
memcpy(ptr, &(data->k), sizeof(data->k));
|
||||
ptr += sizeof(data->k);
|
||||
|
||||
// packing c
|
||||
*((cmph_uint64 *)ptr) = (cmph_uint64)data->c;
|
||||
*((cmph_uint64 *)ptr) = (cmph_uint64)data->c;
|
||||
ptr += sizeof(data->c);
|
||||
|
||||
// packing h1 type
|
||||
CMPH_HASH h1_type = hash_get_type(data->h1[0]);
|
||||
CMPH_HASH h1_type = hash_get_type(data->h1[0]);
|
||||
memcpy(ptr, &h1_type, sizeof(h1_type));
|
||||
ptr += sizeof(h1_type);
|
||||
|
||||
// packing h2 type
|
||||
CMPH_HASH h2_type = hash_get_type(data->h2[0]);
|
||||
CMPH_HASH h2_type = hash_get_type(data->h2[0]);
|
||||
memcpy(ptr, &h2_type, sizeof(h2_type));
|
||||
ptr += sizeof(h2_type);
|
||||
|
||||
// packing size
|
||||
memcpy(ptr, data->size, sizeof(cmph_uint8)*data->k);
|
||||
memcpy(ptr, data->size, sizeof(cmph_uint8)*data->k);
|
||||
ptr += data->k;
|
||||
|
||||
|
||||
// packing offset
|
||||
memcpy(ptr, data->offset, sizeof(cmph_uint32)*data->k);
|
||||
memcpy(ptr, data->offset, sizeof(cmph_uint32)*data->k);
|
||||
ptr += sizeof(cmph_uint32)*data->k;
|
||||
|
||||
|
||||
#if defined (__ia64) || defined (__x86_64__)
|
||||
cmph_uint64 * g_is_ptr = (cmph_uint64 *)ptr;
|
||||
#else
|
||||
cmph_uint32 * g_is_ptr = (cmph_uint32 *)ptr;
|
||||
#endif
|
||||
|
||||
|
||||
cmph_uint8 * g_i = (cmph_uint8 *) (g_is_ptr + data->k);
|
||||
|
||||
|
||||
for(i = 0; i < data->k; i++)
|
||||
{
|
||||
#if defined (__ia64) || defined (__x86_64__)
|
||||
@ -787,7 +788,7 @@ void brz_pack(cmph_t *mphf, void *packed_mphf)
|
||||
// packing h1[i]
|
||||
hash_state_pack(data->h1[i], g_i);
|
||||
g_i += hash_state_packed_size(h1_type);
|
||||
|
||||
|
||||
// packing h2[i]
|
||||
hash_state_pack(data->h2[i], g_i);
|
||||
g_i += hash_state_packed_size(h2_type);
|
||||
@ -803,9 +804,9 @@ void brz_pack(cmph_t *mphf, void *packed_mphf)
|
||||
break;
|
||||
default: assert(0);
|
||||
}
|
||||
memcpy(g_i, data->g[i], sizeof(cmph_uint8)*n);
|
||||
memcpy(g_i, data->g[i], sizeof(cmph_uint8)*n);
|
||||
g_i += n;
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -814,16 +815,16 @@ void brz_pack(cmph_t *mphf, void *packed_mphf)
|
||||
* \brief Return the amount of space needed to pack mphf.
|
||||
* \param mphf pointer to a mphf
|
||||
* \return the size of the packed function or zero for failures
|
||||
*/
|
||||
*/
|
||||
cmph_uint32 brz_packed_size(cmph_t *mphf)
|
||||
{
|
||||
cmph_uint32 i;
|
||||
cmph_uint32 size = 0;
|
||||
brz_data_t *data = (brz_data_t *)mphf->data;
|
||||
CMPH_HASH h0_type = hash_get_type(data->h0);
|
||||
CMPH_HASH h1_type = hash_get_type(data->h1[0]);
|
||||
CMPH_HASH h0_type = hash_get_type(data->h0);
|
||||
CMPH_HASH h1_type = hash_get_type(data->h1[0]);
|
||||
CMPH_HASH h2_type = hash_get_type(data->h2[0]);
|
||||
size = (cmph_uint32)(2*sizeof(CMPH_ALGO) + 3*sizeof(CMPH_HASH) + hash_state_packed_size(h0_type) + sizeof(cmph_uint32) +
|
||||
size = (cmph_uint32)(2*sizeof(CMPH_ALGO) + 3*sizeof(CMPH_HASH) + hash_state_packed_size(h0_type) + sizeof(cmph_uint32) +
|
||||
sizeof(double) + sizeof(cmph_uint8)*data->k + sizeof(cmph_uint32)*data->k);
|
||||
// pointers to g_is
|
||||
#if defined (__ia64) || defined (__x86_64__)
|
||||
@ -831,10 +832,10 @@ cmph_uint32 brz_packed_size(cmph_t *mphf)
|
||||
#else
|
||||
size += (cmph_uint32) sizeof(cmph_uint32)*data->k;
|
||||
#endif
|
||||
|
||||
|
||||
size += hash_state_packed_size(h1_type) * data->k;
|
||||
size += hash_state_packed_size(h2_type) * data->k;
|
||||
|
||||
|
||||
cmph_uint32 n = 0;
|
||||
for(i = 0; i < data->k; i++)
|
||||
{
|
||||
@ -848,7 +849,7 @@ cmph_uint32 brz_packed_size(cmph_t *mphf)
|
||||
break;
|
||||
default: assert(0);
|
||||
}
|
||||
size += n;
|
||||
size += n;
|
||||
}
|
||||
return size;
|
||||
}
|
||||
@ -859,28 +860,28 @@ static cmph_uint32 brz_bmz8_search_packed(cmph_uint32 *packed_mphf, const char *
|
||||
{
|
||||
register CMPH_HASH h0_type = *packed_mphf++;
|
||||
register cmph_uint32 *h0_ptr = packed_mphf;
|
||||
packed_mphf = (cmph_uint32 *)(((cmph_uint8 *)packed_mphf) + hash_state_packed_size(h0_type));
|
||||
|
||||
packed_mphf = (cmph_uint32 *)(((cmph_uint8 *)packed_mphf) + hash_state_packed_size(h0_type));
|
||||
|
||||
register cmph_uint32 k = *packed_mphf++;
|
||||
|
||||
register double c = (double)(*((cmph_uint64*)packed_mphf));
|
||||
packed_mphf += 2;
|
||||
|
||||
register CMPH_HASH h1_type = *packed_mphf++;
|
||||
|
||||
register CMPH_HASH h2_type = *packed_mphf++;
|
||||
register CMPH_HASH h1_type = *packed_mphf++;
|
||||
|
||||
register CMPH_HASH h2_type = *packed_mphf++;
|
||||
|
||||
register cmph_uint8 * size = (cmph_uint8 *) packed_mphf;
|
||||
packed_mphf = (cmph_uint32 *)(size + k);
|
||||
|
||||
packed_mphf = (cmph_uint32 *)(size + k);
|
||||
|
||||
register cmph_uint32 * offset = packed_mphf;
|
||||
packed_mphf += k;
|
||||
|
||||
register cmph_uint32 h0;
|
||||
|
||||
|
||||
hash_vector_packed(h0_ptr, h0_type, key, keylen, fingerprint);
|
||||
h0 = fingerprint[2] % k;
|
||||
|
||||
|
||||
register cmph_uint32 m = size[h0];
|
||||
register cmph_uint32 n = (cmph_uint32)ceil(c * m);
|
||||
|
||||
@ -889,69 +890,69 @@ static cmph_uint32 brz_bmz8_search_packed(cmph_uint32 *packed_mphf, const char *
|
||||
#else
|
||||
register cmph_uint32 * g_is_ptr = packed_mphf;
|
||||
#endif
|
||||
|
||||
|
||||
register cmph_uint8 * h1_ptr = (cmph_uint8 *) g_is_ptr[h0];
|
||||
|
||||
|
||||
register cmph_uint8 * h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
|
||||
|
||||
register cmph_uint8 * g = h2_ptr + hash_state_packed_size(h2_type);
|
||||
|
||||
|
||||
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n;
|
||||
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n;
|
||||
|
||||
register cmph_uint8 mphf_bucket;
|
||||
|
||||
|
||||
if (h1 == h2 && ++h2 >= n) h2 = 0;
|
||||
mphf_bucket = (cmph_uint8)(g[h1] + g[h2]);
|
||||
mphf_bucket = (cmph_uint8)(g[h1] + g[h2]);
|
||||
DEBUGP("key: %s h1: %u h2: %u h0: %u\n", key, h1, h2, h0);
|
||||
DEBUGP("Address: %u\n", mphf_bucket + offset[h0]);
|
||||
return (mphf_bucket + offset[h0]);
|
||||
return (mphf_bucket + offset[h0]);
|
||||
}
|
||||
|
||||
static cmph_uint32 brz_fch_search_packed(cmph_uint32 *packed_mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint)
|
||||
{
|
||||
register CMPH_HASH h0_type = *packed_mphf++;
|
||||
|
||||
|
||||
register cmph_uint32 *h0_ptr = packed_mphf;
|
||||
packed_mphf = (cmph_uint32 *)(((cmph_uint8 *)packed_mphf) + hash_state_packed_size(h0_type));
|
||||
|
||||
packed_mphf = (cmph_uint32 *)(((cmph_uint8 *)packed_mphf) + hash_state_packed_size(h0_type));
|
||||
|
||||
register cmph_uint32 k = *packed_mphf++;
|
||||
|
||||
register double c = (double)(*((cmph_uint64*)packed_mphf));
|
||||
packed_mphf += 2;
|
||||
|
||||
register CMPH_HASH h1_type = *packed_mphf++;
|
||||
register CMPH_HASH h1_type = *packed_mphf++;
|
||||
|
||||
register CMPH_HASH h2_type = *packed_mphf++;
|
||||
register CMPH_HASH h2_type = *packed_mphf++;
|
||||
|
||||
register cmph_uint8 * size = (cmph_uint8 *) packed_mphf;
|
||||
packed_mphf = (cmph_uint32 *)(size + k);
|
||||
|
||||
packed_mphf = (cmph_uint32 *)(size + k);
|
||||
|
||||
register cmph_uint32 * offset = packed_mphf;
|
||||
packed_mphf += k;
|
||||
|
||||
|
||||
register cmph_uint32 h0;
|
||||
|
||||
|
||||
hash_vector_packed(h0_ptr, h0_type, key, keylen, fingerprint);
|
||||
h0 = fingerprint[2] % k;
|
||||
|
||||
|
||||
register cmph_uint32 m = size[h0];
|
||||
register cmph_uint32 b = fch_calc_b(c, m);
|
||||
register double p1 = fch_calc_p1(m);
|
||||
register double p2 = fch_calc_p2(b);
|
||||
|
||||
|
||||
#if defined (__ia64) || defined (__x86_64__)
|
||||
register cmph_uint64 * g_is_ptr = (cmph_uint64 *)packed_mphf;
|
||||
#else
|
||||
register cmph_uint32 * g_is_ptr = packed_mphf;
|
||||
#endif
|
||||
|
||||
|
||||
register cmph_uint8 * h1_ptr = (cmph_uint8 *) g_is_ptr[h0];
|
||||
|
||||
|
||||
register cmph_uint8 * h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
|
||||
|
||||
register cmph_uint8 * g = h2_ptr + hash_state_packed_size(h2_type);
|
||||
|
||||
|
||||
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % m;
|
||||
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % m;
|
||||
|
||||
@ -962,7 +963,7 @@ static cmph_uint32 brz_fch_search_packed(cmph_uint32 *packed_mphf, const char *k
|
||||
}
|
||||
|
||||
/** cmph_uint32 brz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||
* \brief Use the packed mphf to do a search.
|
||||
* \brief Use the packed mphf to do a search.
|
||||
* \param packed_mphf pointer to the packed mphf
|
||||
* \param key key to be hashed
|
||||
* \param keylen key legth in bytes
|
||||
@ -970,7 +971,7 @@ static cmph_uint32 brz_fch_search_packed(cmph_uint32 *packed_mphf, const char *k
|
||||
*/
|
||||
cmph_uint32 brz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
|
||||
{
|
||||
register cmph_uint32 *ptr = (cmph_uint32 *)packed_mphf;
|
||||
register cmph_uint32 *ptr = (cmph_uint32 *)packed_mphf;
|
||||
register CMPH_ALGO algo = *ptr++;
|
||||
cmph_uint32 fingerprint[3];
|
||||
switch(algo)
|
||||
@ -982,4 +983,3 @@ cmph_uint32 brz_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke
|
||||
default: assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -17,7 +17,7 @@ struct __buffer_entry_t
|
||||
buffer_entry_t * buffer_entry_new(cmph_uint32 capacity)
|
||||
{
|
||||
buffer_entry_t *buff_entry = (buffer_entry_t *)malloc(sizeof(buffer_entry_t));
|
||||
assert(buff_entry);
|
||||
if (!buff_entry) return NULL;
|
||||
buff_entry->fd = NULL;
|
||||
buff_entry->buff = NULL;
|
||||
buff_entry->capacity = capacity;
|
||||
@ -62,7 +62,7 @@ cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry, cmph_uint32 *
|
||||
free(buf);
|
||||
return NULL;
|
||||
}
|
||||
if((buffer_entry->pos + lacked_bytes) > buffer_entry->nbytes)
|
||||
if((buffer_entry->pos + lacked_bytes) > buffer_entry->nbytes)
|
||||
{
|
||||
copied_bytes = buffer_entry->nbytes - buffer_entry->pos;
|
||||
lacked_bytes = (buffer_entry->pos + lacked_bytes) - buffer_entry->nbytes;
|
||||
@ -71,7 +71,7 @@ cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry, cmph_uint32 *
|
||||
}
|
||||
memcpy(keylen + copied_bytes, buffer_entry->buff + buffer_entry->pos, (size_t)lacked_bytes);
|
||||
buffer_entry->pos += lacked_bytes;
|
||||
|
||||
|
||||
lacked_bytes = *keylen;
|
||||
copied_bytes = 0;
|
||||
buf = (cmph_uint8 *)malloc(*keylen + sizeof(*keylen));
|
||||
@ -83,7 +83,7 @@ cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry, cmph_uint32 *
|
||||
memcpy(buf + sizeof(*keylen), buffer_entry->buff + buffer_entry->pos, (size_t)copied_bytes);
|
||||
}
|
||||
buffer_entry_load(buffer_entry);
|
||||
}
|
||||
}
|
||||
memcpy(buf+sizeof(*keylen)+copied_bytes, buffer_entry->buff + buffer_entry->pos, (size_t)lacked_bytes);
|
||||
buffer_entry->pos += lacked_bytes;
|
||||
return buf;
|
||||
@ -97,7 +97,7 @@ void buffer_entry_destroy(buffer_entry_t * buffer_entry)
|
||||
buffer_entry->buff = NULL;
|
||||
buffer_entry->capacity = 0;
|
||||
buffer_entry->nbytes = 0;
|
||||
buffer_entry->pos = 0;
|
||||
buffer_entry->pos = 0;
|
||||
buffer_entry->eof = 0;
|
||||
free(buffer_entry);
|
||||
}
|
||||
|
@ -16,7 +16,7 @@ buffer_manage_t * buffer_manage_new(cmph_uint32 memory_avail, cmph_uint32 nentri
|
||||
{
|
||||
cmph_uint32 memory_avail_entry, i;
|
||||
buffer_manage_t *buff_manage = (buffer_manage_t *)malloc(sizeof(buffer_manage_t));
|
||||
assert(buff_manage);
|
||||
if (!buff_manage) return NULL;
|
||||
buff_manage->memory_avail = memory_avail;
|
||||
buff_manage->buffer_entries = (buffer_entry_t **)calloc((size_t)nentries, sizeof(buffer_entry_t *));
|
||||
buff_manage->memory_avail_list = (cmph_uint32 *)calloc((size_t)nentries, sizeof(cmph_uint32));
|
||||
@ -26,7 +26,7 @@ buffer_manage_t * buffer_manage_new(cmph_uint32 memory_avail, cmph_uint32 nentri
|
||||
for(i = 0; i < buff_manage->nentries; i++)
|
||||
{
|
||||
buff_manage->buffer_entries[i] = buffer_entry_new(memory_avail_entry);
|
||||
}
|
||||
}
|
||||
return buff_manage;
|
||||
}
|
||||
|
||||
@ -54,7 +54,7 @@ cmph_uint8 * buffer_manage_read_key(buffer_manage_t * buffer_manage, cmph_uint32
|
||||
}
|
||||
|
||||
void buffer_manage_destroy(buffer_manage_t * buffer_manage)
|
||||
{
|
||||
{
|
||||
cmph_uint32 i;
|
||||
for(i = 0; i < buffer_manage->nentries; i++)
|
||||
{
|
||||
|
@ -16,7 +16,7 @@ buffer_manager_t * buffer_manager_new(cmph_uint32 memory_avail, cmph_uint32 nent
|
||||
{
|
||||
cmph_uint32 memory_avail_entry, i;
|
||||
buffer_manager_t *buff_manager = (buffer_manager_t *)malloc(sizeof(buffer_manager_t));
|
||||
assert(buff_manager);
|
||||
if (!buff_manager) return NULL;
|
||||
buff_manager->memory_avail = memory_avail;
|
||||
buff_manager->buffer_entries = (buffer_entry_t **)calloc((size_t)nentries, sizeof(buffer_entry_t *));
|
||||
buff_manager->memory_avail_list = (cmph_uint32 *)calloc((size_t)nentries, sizeof(cmph_uint32));
|
||||
@ -26,7 +26,7 @@ buffer_manager_t * buffer_manager_new(cmph_uint32 memory_avail, cmph_uint32 nent
|
||||
for(i = 0; i < buff_manager->nentries; i++)
|
||||
{
|
||||
buff_manager->buffer_entries[i] = buffer_entry_new(memory_avail_entry);
|
||||
}
|
||||
}
|
||||
return buff_manager;
|
||||
}
|
||||
|
||||
@ -52,7 +52,7 @@ cmph_uint8 * buffer_manager_read_key(buffer_manager_t * buffer_manager, cmph_uin
|
||||
}
|
||||
|
||||
void buffer_manager_destroy(buffer_manager_t * buffer_manager)
|
||||
{
|
||||
{
|
||||
cmph_uint32 i;
|
||||
for(i = 0; i < buffer_manager->nentries; i++)
|
||||
{
|
||||
|
60
src/chd.c
60
src/chd.c
@ -18,7 +18,7 @@ chd_config_data_t *chd_config_new(cmph_config_t *mph)
|
||||
cmph_io_adapter_t *key_source = mph->key_source;
|
||||
chd_config_data_t *chd;
|
||||
chd = (chd_config_data_t *)malloc(sizeof(chd_config_data_t));
|
||||
assert(chd);
|
||||
if (!chd) return NULL;
|
||||
memset(chd, 0, sizeof(chd_config_data_t));
|
||||
|
||||
chd->chd_ph = cmph_config_new(key_source);
|
||||
@ -69,12 +69,12 @@ cmph_t *chd_new(cmph_config_t *mph, double c)
|
||||
chd_config_data_t *chd = (chd_config_data_t *)mph->data;
|
||||
chd_ph_config_data_t * chd_ph = (chd_ph_config_data_t *)chd->chd_ph->data;
|
||||
compressed_rank_t cr;
|
||||
|
||||
|
||||
register cmph_t * chd_phf = NULL;
|
||||
register cmph_uint32 packed_chd_phf_size = 0;
|
||||
register cmph_uint32 packed_chd_phf_size = 0;
|
||||
cmph_uint8 * packed_chd_phf = NULL;
|
||||
|
||||
register cmph_uint32 packed_cr_size = 0;
|
||||
|
||||
register cmph_uint32 packed_cr_size = 0;
|
||||
cmph_uint8 * packed_cr = NULL;
|
||||
|
||||
register cmph_uint32 i, idx, nkeys, nvals, nbins;
|
||||
@ -86,24 +86,24 @@ cmph_t *chd_new(cmph_config_t *mph, double c)
|
||||
ELAPSED_TIME_IN_SECONDS(&construction_time_begin);
|
||||
#endif
|
||||
|
||||
cmph_config_set_verbosity(chd->chd_ph, mph->verbosity);
|
||||
cmph_config_set_verbosity(chd->chd_ph, mph->verbosity);
|
||||
cmph_config_set_graphsize(chd->chd_ph, c);
|
||||
|
||||
|
||||
if (mph->verbosity)
|
||||
{
|
||||
fprintf(stderr, "Generating a CHD_PH perfect hash function with a load factor equal to %.3f\n", c);
|
||||
}
|
||||
|
||||
|
||||
chd_phf = cmph_new(chd->chd_ph);
|
||||
|
||||
if(chd_phf == NULL)
|
||||
|
||||
if(chd_phf == NULL)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
packed_chd_phf_size = cmph_packed_size(chd_phf);
|
||||
|
||||
packed_chd_phf_size = cmph_packed_size(chd_phf);
|
||||
DEBUGP("packed_chd_phf_size = %u\n", packed_chd_phf_size);
|
||||
|
||||
|
||||
/* Make sure that we have enough space to pack the mphf. */
|
||||
packed_chd_phf = calloc((size_t)packed_chd_phf_size,(size_t)1);
|
||||
|
||||
@ -111,8 +111,8 @@ cmph_t *chd_new(cmph_config_t *mph, double c)
|
||||
cmph_pack(chd_phf, packed_chd_phf);
|
||||
|
||||
cmph_destroy(chd_phf);
|
||||
|
||||
|
||||
|
||||
|
||||
if (mph->verbosity)
|
||||
{
|
||||
fprintf(stderr, "Compressing the range of the resulting CHD_PH perfect hash function\n");
|
||||
@ -121,11 +121,11 @@ cmph_t *chd_new(cmph_config_t *mph, double c)
|
||||
compressed_rank_init(&cr);
|
||||
nbins = chd_ph->n;
|
||||
nkeys = chd_ph->m;
|
||||
nvals = nbins - nkeys;
|
||||
|
||||
nvals = nbins - nkeys;
|
||||
|
||||
vals_table = (cmph_uint32 *)calloc(nvals, sizeof(cmph_uint32));
|
||||
occup_table = (cmph_uint32 *)chd_ph->occup_table;
|
||||
|
||||
|
||||
for(i = 0, idx = 0; i < nbins; i++)
|
||||
{
|
||||
if(!GETBIT32(occup_table, i))
|
||||
@ -133,10 +133,10 @@ cmph_t *chd_new(cmph_config_t *mph, double c)
|
||||
vals_table[idx++] = i;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
compressed_rank_generate(&cr, vals_table, nvals);
|
||||
free(vals_table);
|
||||
|
||||
|
||||
packed_cr_size = compressed_rank_packed_size(&cr);
|
||||
packed_cr = (cmph_uint8 *) calloc(packed_cr_size, sizeof(cmph_uint8));
|
||||
compressed_rank_pack(&cr, packed_cr);
|
||||
@ -145,16 +145,16 @@ cmph_t *chd_new(cmph_config_t *mph, double c)
|
||||
mphf = (cmph_t *)malloc(sizeof(cmph_t));
|
||||
mphf->algo = mph->algo;
|
||||
chdf = (chd_data_t *)malloc(sizeof(chd_data_t));
|
||||
|
||||
|
||||
chdf->packed_cr = packed_cr;
|
||||
packed_cr = NULL; //transfer memory ownership
|
||||
|
||||
chdf->packed_chd_phf = packed_chd_phf;
|
||||
packed_chd_phf = NULL; //transfer memory ownership
|
||||
|
||||
|
||||
chdf->packed_chd_phf_size = packed_chd_phf_size;
|
||||
chdf->packed_cr_size = packed_cr_size;
|
||||
|
||||
|
||||
mphf->data = chdf;
|
||||
mphf->size = nkeys;
|
||||
|
||||
@ -163,12 +163,12 @@ cmph_t *chd_new(cmph_config_t *mph, double c)
|
||||
{
|
||||
fprintf(stderr, "Successfully generated minimal perfect hash function\n");
|
||||
}
|
||||
#ifdef CMPH_TIMING
|
||||
#ifdef CMPH_TIMING
|
||||
ELAPSED_TIME_IN_SECONDS(&construction_time);
|
||||
register cmph_uint32 space_usage = chd_packed_size(mphf)*8;
|
||||
construction_time = construction_time - construction_time_begin;
|
||||
fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", nkeys, c, chd_ph->keys_per_bucket, construction_time, space_usage/(double)nkeys);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return mphf;
|
||||
}
|
||||
@ -196,7 +196,7 @@ int chd_dump(cmph_t *mphf, FILE *fd)
|
||||
{
|
||||
register size_t nbytes;
|
||||
chd_data_t *data = (chd_data_t *)mphf->data;
|
||||
|
||||
|
||||
__cmph_dump(mphf, fd);
|
||||
// Dumping CHD_PH perfect hash function
|
||||
|
||||
@ -207,7 +207,7 @@ int chd_dump(cmph_t *mphf, FILE *fd)
|
||||
DEBUGP("Dumping compressed rank structure with %u bytes to disk\n", 1);
|
||||
nbytes = fwrite(&data->packed_cr_size, sizeof(cmph_uint32), (size_t)1, fd);
|
||||
nbytes = fwrite(data->packed_cr, data->packed_cr_size, (size_t)1, fd);
|
||||
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -242,10 +242,10 @@ void chd_pack(cmph_t *mphf, void *packed_mphf)
|
||||
// packing packed_cr_size and packed_cr
|
||||
*ptr = data->packed_cr_size;
|
||||
ptr8 = (cmph_uint8 *) (ptr + 1);
|
||||
|
||||
|
||||
memcpy(ptr8, data->packed_cr, data->packed_cr_size);
|
||||
ptr8 += data->packed_cr_size;
|
||||
|
||||
|
||||
ptr = (cmph_uint32 *) ptr8;
|
||||
*ptr = data->packed_chd_phf_size;
|
||||
|
||||
@ -268,5 +268,3 @@ cmph_uint32 chd_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke
|
||||
register cmph_uint8 * packed_chd_phf = ((cmph_uint8 *) ptr) + packed_cr_size + sizeof(cmph_uint32);
|
||||
return _chd_search(packed_chd_phf, ptr, key, keylen);
|
||||
}
|
||||
|
||||
|
||||
|
157
src/chd_ph.c
157
src/chd_ph.c
@ -29,7 +29,7 @@ struct _chd_ph_item_t
|
||||
};
|
||||
typedef struct _chd_ph_item_t chd_ph_item_t;
|
||||
|
||||
// struct to represent the items at mapping phase only.
|
||||
// struct to represent the items at mapping phase only.
|
||||
struct _chd_ph_map_item_t
|
||||
{
|
||||
cmph_uint32 f;
|
||||
@ -85,7 +85,7 @@ static cmph_uint8 chd_ph_bucket_insert(chd_ph_bucket_t * buckets,chd_ph_map_item
|
||||
register chd_ph_map_item_t * tmp_map_item = map_items + item_idx;
|
||||
register chd_ph_bucket_t * bucket = buckets + tmp_map_item->bucket_num;
|
||||
tmp_item = items + bucket->items_list;
|
||||
|
||||
|
||||
for(i = 0; i < bucket->size; i++)
|
||||
{
|
||||
if(tmp_item->f == tmp_map_item->f && tmp_item->h == tmp_map_item->h)
|
||||
@ -105,7 +105,7 @@ void chd_ph_bucket_destroy(chd_ph_bucket_t * buckets)
|
||||
free(buckets);
|
||||
}
|
||||
|
||||
static inline cmph_uint8 chd_ph_mapping(cmph_config_t *mph, chd_ph_bucket_t * buckets, chd_ph_item_t * items,
|
||||
static inline cmph_uint8 chd_ph_mapping(cmph_config_t *mph, chd_ph_bucket_t * buckets, chd_ph_item_t * items,
|
||||
cmph_uint32 *max_bucket_size);
|
||||
|
||||
static chd_ph_sorted_list_t * chd_ph_ordering(chd_ph_bucket_t ** _buckets,chd_ph_item_t ** items,
|
||||
@ -131,7 +131,7 @@ static inline double chd_ph_get_entropy(cmph_uint32 * disp_table, cmph_uint32 n,
|
||||
{
|
||||
probe_counts[disp_table[i]]++;
|
||||
};
|
||||
|
||||
|
||||
for(i = 0; i < max_probes; i++)
|
||||
{
|
||||
if(probe_counts[i] > 0)
|
||||
@ -145,9 +145,9 @@ chd_ph_config_data_t *chd_ph_config_new(void)
|
||||
{
|
||||
chd_ph_config_data_t *chd_ph;
|
||||
chd_ph = (chd_ph_config_data_t *)malloc(sizeof(chd_ph_config_data_t));
|
||||
assert(chd_ph);
|
||||
if (!chd_ph) return NULL;
|
||||
memset(chd_ph, 0, sizeof(chd_ph_config_data_t));
|
||||
|
||||
|
||||
chd_ph->hashfunc = CMPH_HASH_JENKINS;
|
||||
chd_ph->cs = NULL;
|
||||
chd_ph->nbuckets = 0;
|
||||
@ -159,7 +159,7 @@ chd_ph_config_data_t *chd_ph_config_new(void)
|
||||
chd_ph->keys_per_bin = 1;
|
||||
chd_ph->keys_per_bucket = 4;
|
||||
chd_ph->occup_table = 0;
|
||||
|
||||
|
||||
return chd_ph;
|
||||
}
|
||||
|
||||
@ -184,7 +184,7 @@ void chd_ph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
|
||||
while(*hashptr != CMPH_HASH_COUNT)
|
||||
{
|
||||
if (i >= 1) break; //chd_ph only uses one linear hash function
|
||||
chd_ph->hashfunc = *hashptr;
|
||||
chd_ph->hashfunc = *hashptr;
|
||||
++i, ++hashptr;
|
||||
}
|
||||
}
|
||||
@ -228,24 +228,24 @@ cmph_uint8 chd_ph_mapping(cmph_config_t *mph, chd_ph_bucket_t * buckets, chd_ph_
|
||||
{
|
||||
mapping_iterations--;
|
||||
if (chd_ph->hl) hash_state_destroy(chd_ph->hl);
|
||||
chd_ph->hl = hash_state_new(chd_ph->hashfunc, chd_ph->m);
|
||||
chd_ph->hl = hash_state_new(chd_ph->hashfunc, chd_ph->m);
|
||||
|
||||
chd_ph_bucket_clean(buckets, chd_ph->nbuckets);
|
||||
|
||||
mph->key_source->rewind(mph->key_source->data);
|
||||
mph->key_source->rewind(mph->key_source->data);
|
||||
|
||||
for(i = 0; i < chd_ph->m; i++)
|
||||
{
|
||||
mph->key_source->read(mph->key_source->data, &key, &keylen);
|
||||
mph->key_source->read(mph->key_source->data, &key, &keylen);
|
||||
hash_vector(chd_ph->hl, key, keylen, hl);
|
||||
|
||||
|
||||
map_item = (map_items + i);
|
||||
|
||||
g = hl[0] % chd_ph->nbuckets;
|
||||
map_item->f = hl[1] % chd_ph->n;
|
||||
map_item->h = hl[2] % (chd_ph->n - 1) + 1;
|
||||
map_item->bucket_num=g;
|
||||
mph->key_source->dispose(mph->key_source->data, key, keylen);
|
||||
mph->key_source->dispose(mph->key_source->data, key, keylen);
|
||||
// if(buckets[g].size == (chd_ph->keys_per_bucket << 2))
|
||||
// {
|
||||
// DEBUGP("BUCKET = %u -- SIZE = %u -- MAXIMUM SIZE = %u\n", g, buckets[g].size, (chd_ph->keys_per_bucket << 2));
|
||||
@ -275,7 +275,7 @@ cmph_uint8 chd_ph_mapping(cmph_config_t *mph, chd_ph_bucket_t * buckets, chd_ph_
|
||||
free(map_items);
|
||||
return 1; // SUCCESS
|
||||
}
|
||||
|
||||
|
||||
if(mapping_iterations == 0)
|
||||
{
|
||||
goto error;
|
||||
@ -292,7 +292,7 @@ chd_ph_sorted_list_t * chd_ph_ordering(chd_ph_bucket_t ** _buckets, chd_ph_item_
|
||||
cmph_uint32 nbuckets, cmph_uint32 nitems, cmph_uint32 max_bucket_size)
|
||||
{
|
||||
chd_ph_sorted_list_t * sorted_lists = (chd_ph_sorted_list_t *) calloc(max_bucket_size + 1, sizeof(chd_ph_sorted_list_t));
|
||||
|
||||
|
||||
chd_ph_bucket_t * input_buckets = (*_buckets);
|
||||
chd_ph_bucket_t * output_buckets;
|
||||
chd_ph_item_t * input_items = (*_items);
|
||||
@ -319,7 +319,7 @@ chd_ph_sorted_list_t * chd_ph_ordering(chd_ph_bucket_t ** _buckets, chd_ph_item_
|
||||
// Store the buckets in a new array which is sorted by bucket sizes
|
||||
output_buckets = calloc(nbuckets, sizeof(chd_ph_bucket_t)); // everything is initialized with zero
|
||||
// non_empty_buckets = nbuckets;
|
||||
|
||||
|
||||
for(i = 0; i < nbuckets; i++)
|
||||
{
|
||||
bucket_size = input_buckets[i].size;
|
||||
@ -338,8 +338,8 @@ chd_ph_sorted_list_t * chd_ph_ordering(chd_ph_bucket_t ** _buckets, chd_ph_item_
|
||||
// Return the buckets sorted in new order and free the old buckets sorted in old order
|
||||
free(input_buckets);
|
||||
(*_buckets) = output_buckets;
|
||||
|
||||
|
||||
|
||||
|
||||
// Store the items according to the new order of buckets.
|
||||
output_items = (chd_ph_item_t*)calloc(nitems, sizeof(chd_ph_item_t));
|
||||
position = 0;
|
||||
@ -426,26 +426,26 @@ static inline cmph_uint8 place_bucket_probe(chd_ph_config_data_t *chd_ph, chd_ph
|
||||
}
|
||||
position = (cmph_uint32)((item->f + ((cmph_uint64 )item->h) * probe0_num + probe1_num) % chd_ph->n);
|
||||
UNSETBIT32(((cmph_uint32*)chd_ph->occup_table), position);
|
||||
|
||||
|
||||
// ([position/32]^=(1<<(position%32));
|
||||
item++;
|
||||
i--;
|
||||
};
|
||||
};
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
};
|
||||
|
||||
static inline cmph_uint8 place_bucket(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, chd_ph_item_t * items, cmph_uint32 max_probes,
|
||||
static inline cmph_uint8 place_bucket(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, chd_ph_item_t * items, cmph_uint32 max_probes,
|
||||
cmph_uint32 * disp_table, cmph_uint32 bucket_num, cmph_uint32 size)
|
||||
|
||||
|
||||
{
|
||||
register cmph_uint32 probe0_num, probe1_num, probe_num;
|
||||
probe0_num = 0;
|
||||
probe1_num = 0;
|
||||
probe_num = 0;
|
||||
|
||||
|
||||
while(1)
|
||||
{
|
||||
if(place_bucket_probe(chd_ph, buckets, items, probe0_num, probe1_num, bucket_num,size))
|
||||
@ -469,7 +469,7 @@ static inline cmph_uint8 place_bucket(chd_ph_config_data_t *chd_ph, chd_ph_bucke
|
||||
};
|
||||
|
||||
static inline cmph_uint8 place_buckets1(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t * buckets, chd_ph_item_t *items,
|
||||
cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes,
|
||||
cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes,
|
||||
cmph_uint32 * disp_table)
|
||||
{
|
||||
register cmph_uint32 i = 0;
|
||||
@ -490,8 +490,8 @@ static inline cmph_uint8 place_buckets1(chd_ph_config_data_t *chd_ph, chd_ph_buc
|
||||
return 1;
|
||||
};
|
||||
|
||||
static inline cmph_uint8 place_buckets2(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, chd_ph_item_t * items,
|
||||
cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes,
|
||||
static inline cmph_uint8 place_buckets2(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, chd_ph_item_t * items,
|
||||
cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes,
|
||||
cmph_uint32 * disp_table)
|
||||
{
|
||||
register cmph_uint32 i,j, non_placed_bucket;
|
||||
@ -516,10 +516,10 @@ static inline cmph_uint8 place_buckets2(chd_ph_config_data_t *chd_ph, chd_ph_buc
|
||||
{
|
||||
// if bucket is successfully placed remove it from list
|
||||
if(place_bucket_probe(chd_ph, buckets, items, probe0_num, probe1_num, curr_bucket, i))
|
||||
{
|
||||
{
|
||||
disp_table[buckets[curr_bucket].bucket_id] = probe0_num + probe1_num * chd_ph->n;
|
||||
// DEBUGP("BUCKET %u PLACED --- DISPLACEMENT = %u\n", curr_bucket, disp_table[curr_bucket]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// DEBUGP("BUCKET %u NOT PLACED\n", curr_bucket);
|
||||
@ -529,7 +529,7 @@ static inline cmph_uint8 place_buckets2(chd_ph_config_data_t *chd_ph, chd_ph_buc
|
||||
#endif
|
||||
buckets[non_placed_bucket + sorted_lists[i].buckets_list].items_list = buckets[curr_bucket].items_list;
|
||||
buckets[non_placed_bucket + sorted_lists[i].buckets_list].bucket_id = buckets[curr_bucket].bucket_id;
|
||||
#ifdef DEBUG
|
||||
#ifdef DEBUG
|
||||
buckets[curr_bucket].items_list=items_list;
|
||||
buckets[curr_bucket].bucket_id=bucket_id;
|
||||
#endif
|
||||
@ -557,7 +557,7 @@ static inline cmph_uint8 place_buckets2(chd_ph_config_data_t *chd_ph, chd_ph_buc
|
||||
};
|
||||
|
||||
cmph_uint8 chd_ph_searching(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, chd_ph_item_t *items ,
|
||||
cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes,
|
||||
cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes,
|
||||
cmph_uint32 * disp_table)
|
||||
{
|
||||
if(chd_ph->use_h)
|
||||
@ -582,7 +582,7 @@ static inline cmph_uint8 chd_ph_check_bin_hashing(chd_ph_config_data_t *chd_ph,
|
||||
memset(chd_ph->occup_table, 0, chd_ph->n);
|
||||
else
|
||||
memset(chd_ph->occup_table, 0, ((chd_ph->n + 31)/32) * sizeof(cmph_uint32));
|
||||
|
||||
|
||||
for(bucket_size = 1; bucket_size <= max_bucket_size; bucket_size++)
|
||||
for(i = sorted_lists[bucket_size].buckets_list; i < sorted_lists[bucket_size].size +
|
||||
sorted_lists[bucket_size].buckets_list; i++)
|
||||
@ -602,7 +602,7 @@ static inline cmph_uint8 chd_ph_check_bin_hashing(chd_ph_config_data_t *chd_ph,
|
||||
return 0;
|
||||
}
|
||||
(chd_ph->occup_table[position])++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(GETBIT32(((cmph_uint32*)chd_ph->occup_table), position))
|
||||
@ -624,7 +624,7 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
|
||||
cmph_t *mphf = NULL;
|
||||
chd_ph_data_t *chd_phf = NULL;
|
||||
chd_ph_config_data_t *chd_ph = (chd_ph_config_data_t *)mph->data;
|
||||
|
||||
|
||||
register double load_factor = c;
|
||||
register cmph_uint8 searching_success = 0;
|
||||
register cmph_uint32 max_probes = 1 << 20; // default value for max_probes
|
||||
@ -645,24 +645,24 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
|
||||
|
||||
chd_ph->m = mph->key_source->nkeys;
|
||||
DEBUGP("m = %u\n", chd_ph->m);
|
||||
|
||||
|
||||
chd_ph->nbuckets = (cmph_uint32)(chd_ph->m/chd_ph->keys_per_bucket) + 1;
|
||||
DEBUGP("nbuckets = %u\n", chd_ph->nbuckets);
|
||||
|
||||
|
||||
if(load_factor < 0.5 )
|
||||
{
|
||||
load_factor = 0.5;
|
||||
}
|
||||
|
||||
|
||||
if(load_factor >= 0.99)
|
||||
{
|
||||
load_factor = 0.99;
|
||||
}
|
||||
|
||||
|
||||
DEBUGP("load_factor = %.3f\n", load_factor);
|
||||
|
||||
|
||||
chd_ph->n = (cmph_uint32)(chd_ph->m/(chd_ph->keys_per_bin * load_factor)) + 1;
|
||||
|
||||
|
||||
//Round the number of bins to the prime immediately above
|
||||
if(chd_ph->n % 2 == 0) chd_ph->n++;
|
||||
for(;;)
|
||||
@ -670,35 +670,35 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
|
||||
if(check_primality(chd_ph->n) == 1)
|
||||
break;
|
||||
chd_ph->n += 2; // just odd numbers can be primes for n > 2
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
||||
DEBUGP("n = %u \n", chd_ph->n);
|
||||
if(chd_ph->keys_per_bin == 1)
|
||||
{
|
||||
space_lower_bound = chd_ph_space_lower_bound(chd_ph->m, chd_ph->n);
|
||||
}
|
||||
|
||||
|
||||
if(mph->verbosity)
|
||||
{
|
||||
fprintf(stderr, "space lower bound is %.3f bits per key\n", space_lower_bound);
|
||||
}
|
||||
|
||||
// We allocate the working tables
|
||||
buckets = chd_ph_bucket_new(chd_ph->nbuckets);
|
||||
buckets = chd_ph_bucket_new(chd_ph->nbuckets);
|
||||
items = (chd_ph_item_t *) calloc(chd_ph->m, sizeof(chd_ph_item_t));
|
||||
|
||||
max_probes = (cmph_uint32)(((log(chd_ph->m)/log(2))/20) * max_probes);
|
||||
|
||||
|
||||
if(chd_ph->keys_per_bin == 1)
|
||||
chd_ph->occup_table = (cmph_uint8 *) calloc(((chd_ph->n + 31)/32), sizeof(cmph_uint32));
|
||||
else
|
||||
chd_ph->occup_table = (cmph_uint8 *) calloc(chd_ph->n, sizeof(cmph_uint8));
|
||||
|
||||
|
||||
disp_table = (cmph_uint32 *) calloc(chd_ph->nbuckets, sizeof(cmph_uint32));
|
||||
//
|
||||
//
|
||||
// init_genrand(time(0));
|
||||
|
||||
|
||||
while(1)
|
||||
{
|
||||
iterations --;
|
||||
@ -706,12 +706,12 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
|
||||
{
|
||||
fprintf(stderr, "Starting mapping step for mph creation of %u keys with %u bins\n", chd_ph->m, chd_ph->n);
|
||||
}
|
||||
|
||||
|
||||
if(!chd_ph_mapping(mph, buckets, items, &max_bucket_size))
|
||||
{
|
||||
if (mph->verbosity)
|
||||
{
|
||||
fprintf(stderr, "Failure in mapping step\n");
|
||||
fprintf(stderr, "Failure in mapping step\n");
|
||||
}
|
||||
failure = 1;
|
||||
goto cleanup;
|
||||
@ -727,15 +727,15 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
|
||||
}
|
||||
|
||||
sorted_lists = chd_ph_ordering(&buckets, &items, chd_ph->nbuckets, chd_ph->m, max_bucket_size);
|
||||
|
||||
|
||||
if (mph->verbosity)
|
||||
{
|
||||
fprintf(stderr, "Starting searching step\n");
|
||||
}
|
||||
|
||||
|
||||
searching_success = chd_ph_searching(chd_ph, buckets, items, max_bucket_size, sorted_lists, max_probes, disp_table);
|
||||
if(searching_success) break;
|
||||
|
||||
|
||||
// reset occup_table
|
||||
if(chd_ph->keys_per_bin > 1)
|
||||
memset(chd_ph->occup_table, 0, chd_ph->n);
|
||||
@ -757,19 +757,19 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
|
||||
{
|
||||
if(!chd_ph_check_bin_hashing(chd_ph, buckets, items, disp_table,sorted_lists,max_bucket_size))
|
||||
{
|
||||
|
||||
|
||||
DEBUGP("Error for bin packing generation");
|
||||
failure = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
if (mph->verbosity)
|
||||
{
|
||||
fprintf(stderr, "Starting compressing step\n");
|
||||
}
|
||||
|
||||
|
||||
if(chd_ph->cs)
|
||||
{
|
||||
free(chd_ph->cs);
|
||||
@ -777,7 +777,7 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
|
||||
chd_ph->cs = (compressed_seq_t *) calloc(1, sizeof(compressed_seq_t));
|
||||
compressed_seq_init(chd_ph->cs);
|
||||
compressed_seq_generate(chd_ph->cs, disp_table, chd_ph->nbuckets);
|
||||
|
||||
|
||||
#ifdef CMPH_TIMING
|
||||
ELAPSED_TIME_IN_SECONDS(&construction_time);
|
||||
register double entropy = chd_ph_get_entropy(disp_table, chd_ph->nbuckets, max_probes);
|
||||
@ -785,11 +785,11 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
|
||||
#endif
|
||||
|
||||
cleanup:
|
||||
chd_ph_bucket_destroy(buckets);
|
||||
chd_ph_bucket_destroy(buckets);
|
||||
free(items);
|
||||
free(sorted_lists);
|
||||
free(disp_table);
|
||||
if(failure)
|
||||
if(failure)
|
||||
{
|
||||
if(chd_ph->hl)
|
||||
{
|
||||
@ -802,14 +802,14 @@ cleanup:
|
||||
mphf = (cmph_t *)malloc(sizeof(cmph_t));
|
||||
mphf->algo = mph->algo;
|
||||
chd_phf = (chd_ph_data_t *)malloc(sizeof(chd_ph_data_t));
|
||||
|
||||
|
||||
chd_phf->cs = chd_ph->cs;
|
||||
chd_ph->cs = NULL; //transfer memory ownership
|
||||
chd_phf->hl = chd_ph->hl;
|
||||
chd_ph->hl = NULL; //transfer memory ownership
|
||||
chd_phf->n = chd_ph->n;
|
||||
chd_phf->nbuckets = chd_ph->nbuckets;
|
||||
|
||||
|
||||
mphf->data = chd_phf;
|
||||
mphf->size = chd_ph->n;
|
||||
|
||||
@ -818,12 +818,12 @@ cleanup:
|
||||
{
|
||||
fprintf(stderr, "Successfully generated minimal perfect hash function\n");
|
||||
}
|
||||
|
||||
#ifdef CMPH_TIMING
|
||||
|
||||
#ifdef CMPH_TIMING
|
||||
register cmph_uint32 space_usage = chd_ph_packed_size(mphf)*8;
|
||||
construction_time = construction_time - construction_time_begin;
|
||||
fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\t%.4f\t%.4f\n", chd_ph->m, load_factor, chd_ph->keys_per_bucket, construction_time, space_usage/(double)chd_ph->m, space_lower_bound, entropy/chd_ph->m);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return mphf;
|
||||
}
|
||||
@ -846,19 +846,19 @@ void chd_ph_load(FILE *fd, cmph_t *mphf)
|
||||
nbytes = fread(buf, (size_t)buflen, (size_t)1, fd);
|
||||
chd_ph->hl = hash_state_load(buf, buflen);
|
||||
free(buf);
|
||||
|
||||
|
||||
nbytes = fread(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
|
||||
DEBUGP("Compressed sequence structure has %u bytes\n", buflen);
|
||||
buf = (char *)malloc((size_t)buflen);
|
||||
nbytes = fread(buf, (size_t)buflen, (size_t)1, fd);
|
||||
chd_ph->cs = (compressed_seq_t *) calloc(1, sizeof(compressed_seq_t));
|
||||
chd_ph->cs = (compressed_seq_t *) calloc(1, sizeof(compressed_seq_t));
|
||||
compressed_seq_load(chd_ph->cs, buf, buflen);
|
||||
free(buf);
|
||||
|
||||
|
||||
// loading n and nbuckets
|
||||
DEBUGP("Reading n and nbuckets\n");
|
||||
nbytes = fread(&(chd_ph->n), sizeof(cmph_uint32), (size_t)1, fd);
|
||||
nbytes = fread(&(chd_ph->nbuckets), sizeof(cmph_uint32), (size_t)1, fd);
|
||||
nbytes = fread(&(chd_ph->n), sizeof(cmph_uint32), (size_t)1, fd);
|
||||
nbytes = fread(&(chd_ph->nbuckets), sizeof(cmph_uint32), (size_t)1, fd);
|
||||
}
|
||||
|
||||
int chd_ph_dump(cmph_t *mphf, FILE *fd)
|
||||
@ -867,7 +867,7 @@ int chd_ph_dump(cmph_t *mphf, FILE *fd)
|
||||
cmph_uint32 buflen;
|
||||
register size_t nbytes;
|
||||
chd_ph_data_t *data = (chd_ph_data_t *)mphf->data;
|
||||
|
||||
|
||||
__cmph_dump(mphf, fd);
|
||||
|
||||
hash_state_dump(data->hl, &buf, &buflen);
|
||||
@ -906,11 +906,11 @@ cmph_uint32 chd_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||
register cmph_uint32 disp,position;
|
||||
register cmph_uint32 probe0_num,probe1_num;
|
||||
register cmph_uint32 f,g,h;
|
||||
hash_vector(chd_ph->hl, key, keylen, hl);
|
||||
hash_vector(chd_ph->hl, key, keylen, hl);
|
||||
g = hl[0] % chd_ph->nbuckets;
|
||||
f = hl[1] % chd_ph->n;
|
||||
h = hl[2] % (chd_ph->n-1) + 1;
|
||||
|
||||
|
||||
disp = compressed_seq_query(chd_ph->cs, g);
|
||||
probe0_num = disp % chd_ph->n;
|
||||
probe1_num = disp/chd_ph->n;
|
||||
@ -949,10 +949,10 @@ void chd_ph_pack(cmph_t *mphf, void *packed_mphf)
|
||||
cmph_uint32 chd_ph_packed_size(cmph_t *mphf)
|
||||
{
|
||||
register chd_ph_data_t *data = (chd_ph_data_t *)mphf->data;
|
||||
register CMPH_HASH hl_type = hash_get_type(data->hl);
|
||||
register CMPH_HASH hl_type = hash_get_type(data->hl);
|
||||
register cmph_uint32 hash_state_pack_size = hash_state_packed_size(hl_type);
|
||||
register cmph_uint32 cs_pack_size = compressed_seq_packed_size(data->cs);
|
||||
|
||||
|
||||
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_pack_size + cs_pack_size + 3*sizeof(cmph_uint32));
|
||||
|
||||
}
|
||||
@ -961,28 +961,25 @@ cmph_uint32 chd_ph_search_packed(void *packed_mphf, const char *key, cmph_uint32
|
||||
{
|
||||
register CMPH_HASH hl_type = *(cmph_uint32 *)packed_mphf;
|
||||
register cmph_uint8 *hl_ptr = (cmph_uint8 *)(packed_mphf) + 4;
|
||||
|
||||
|
||||
register cmph_uint32 * ptr = (cmph_uint32 *)(hl_ptr + hash_state_packed_size(hl_type));
|
||||
register cmph_uint32 n = *ptr++;
|
||||
register cmph_uint32 nbuckets = *ptr++;
|
||||
cmph_uint32 hl[3];
|
||||
|
||||
|
||||
register cmph_uint32 disp,position;
|
||||
register cmph_uint32 probe0_num,probe1_num;
|
||||
register cmph_uint32 f,g,h;
|
||||
|
||||
|
||||
hash_vector_packed(hl_ptr, hl_type, key, keylen, hl);
|
||||
|
||||
g = hl[0] % nbuckets;
|
||||
f = hl[1] % n;
|
||||
h = hl[2] % (n-1) + 1;
|
||||
|
||||
|
||||
disp = compressed_seq_query_packed(ptr, g);
|
||||
probe0_num = disp % n;
|
||||
probe1_num = disp/n;
|
||||
position = (cmph_uint32)((f + ((cmph_uint64 )h)*probe0_num + probe1_num) % n);
|
||||
return position;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
64
src/chm.c
64
src/chm.c
@ -21,7 +21,7 @@ chm_config_data_t *chm_config_new(void)
|
||||
{
|
||||
chm_config_data_t *chm = NULL;
|
||||
chm = (chm_config_data_t *)malloc(sizeof(chm_config_data_t));
|
||||
assert(chm);
|
||||
if (!chm) return NULL;
|
||||
memset(chm, 0, sizeof(chm_config_data_t));
|
||||
chm->hashfuncs[0] = CMPH_HASH_JENKINS;
|
||||
chm->hashfuncs[1] = CMPH_HASH_JENKINS;
|
||||
@ -45,7 +45,7 @@ void chm_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
|
||||
while(*hashptr != CMPH_HASH_COUNT)
|
||||
{
|
||||
if (i >= 2) break; //chm only uses two hash functions
|
||||
chm->hashfuncs[i] = *hashptr;
|
||||
chm->hashfuncs[i] = *hashptr;
|
||||
++i, ++hashptr;
|
||||
}
|
||||
}
|
||||
@ -61,7 +61,7 @@ cmph_t *chm_new(cmph_config_t *mph, double c)
|
||||
chm_config_data_t *chm = (chm_config_data_t *)mph->data;
|
||||
chm->m = mph->key_source->nkeys;
|
||||
if (c == 0) c = 2.09;
|
||||
chm->n = (cmph_uint32)ceil(c * mph->key_source->nkeys);
|
||||
chm->n = (cmph_uint32)ceil(c * mph->key_source->nkeys);
|
||||
DEBUGP("m (edges): %u n (vertices): %u c: %f\n", chm->m, chm->n, c);
|
||||
chm->graph = graph_new(chm->n, chm->m);
|
||||
DEBUGP("Created graph\n");
|
||||
@ -92,12 +92,12 @@ cmph_t *chm_new(cmph_config_t *mph, double c)
|
||||
fprintf(stderr, "Acyclic graph creation failure - %u iterations remaining\n", iterations);
|
||||
}
|
||||
if (iterations == 0) break;
|
||||
}
|
||||
else break;
|
||||
}
|
||||
else break;
|
||||
}
|
||||
if (iterations == 0)
|
||||
{
|
||||
graph_destroy(chm->graph);
|
||||
graph_destroy(chm->graph);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -120,7 +120,7 @@ cmph_t *chm_new(cmph_config_t *mph, double c)
|
||||
chm_traverse(chm, visited, i);
|
||||
}
|
||||
}
|
||||
graph_destroy(chm->graph);
|
||||
graph_destroy(chm->graph);
|
||||
free(visited);
|
||||
chm->graph = NULL;
|
||||
|
||||
@ -149,7 +149,7 @@ static void chm_traverse(chm_config_data_t *chm, cmph_uint8 *visited, cmph_uint3
|
||||
graph_iterator_t it = graph_neighbors_it(chm->graph, v);
|
||||
cmph_uint32 neighbor = 0;
|
||||
SETBIT(visited,v);
|
||||
|
||||
|
||||
DEBUGP("Visiting vertex %u\n", v);
|
||||
while((neighbor = graph_next_neighbor(chm->graph, &it)) != GRAPH_NO_NEIGHBOR)
|
||||
{
|
||||
@ -162,7 +162,7 @@ static void chm_traverse(chm_config_data_t *chm, cmph_uint8 *visited, cmph_uint3
|
||||
chm_traverse(chm, visited, neighbor);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static int chm_gen_edges(cmph_config_t *mph)
|
||||
{
|
||||
cmph_uint32 e;
|
||||
@ -170,7 +170,7 @@ static int chm_gen_edges(cmph_config_t *mph)
|
||||
int cycles = 0;
|
||||
|
||||
DEBUGP("Generating edges for %u vertices with hash functions %s and %s\n", chm->n, cmph_hash_names[chm->hashfuncs[0]], cmph_hash_names[chm->hashfuncs[1]]);
|
||||
graph_clear_edges(chm->graph);
|
||||
graph_clear_edges(chm->graph);
|
||||
mph->key_source->rewind(mph->key_source->data);
|
||||
for (e = 0; e < mph->key_source->nkeys; ++e)
|
||||
{
|
||||
@ -181,7 +181,7 @@ static int chm_gen_edges(cmph_config_t *mph)
|
||||
h1 = hash(chm->hashes[0], key, keylen) % chm->n;
|
||||
h2 = hash(chm->hashes[1], key, keylen) % chm->n;
|
||||
if (h1 == h2) if (++h2 >= chm->n) h2 = 0;
|
||||
if (h1 == h2)
|
||||
if (h1 == h2)
|
||||
{
|
||||
if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e);
|
||||
mph->key_source->dispose(mph->key_source->data, key, keylen);
|
||||
@ -205,7 +205,7 @@ int chm_dump(cmph_t *mphf, FILE *fd)
|
||||
cmph_uint32 two = 2; //number of hash functions
|
||||
chm_data_t *data = (chm_data_t *)mphf->data;
|
||||
register size_t nbytes;
|
||||
|
||||
|
||||
__cmph_dump(mphf, fd);
|
||||
|
||||
nbytes = fwrite(&two, sizeof(cmph_uint32), (size_t)1, fd);
|
||||
@ -223,7 +223,7 @@ int chm_dump(cmph_t *mphf, FILE *fd)
|
||||
|
||||
nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd);
|
||||
nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
|
||||
|
||||
|
||||
nbytes = fwrite(data->g, sizeof(cmph_uint32)*data->n, (size_t)1, fd);
|
||||
/* #ifdef DEBUG
|
||||
fprintf(stderr, "G: ");
|
||||
@ -260,8 +260,8 @@ void chm_load(FILE *f, cmph_t *mphf)
|
||||
}
|
||||
|
||||
DEBUGP("Reading m and n\n");
|
||||
nbytes = fread(&(chm->n), sizeof(cmph_uint32), (size_t)1, f);
|
||||
nbytes = fread(&(chm->m), sizeof(cmph_uint32), (size_t)1, f);
|
||||
nbytes = fread(&(chm->n), sizeof(cmph_uint32), (size_t)1, f);
|
||||
nbytes = fread(&(chm->m), sizeof(cmph_uint32), (size_t)1, f);
|
||||
|
||||
chm->g = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*chm->n);
|
||||
nbytes = fread(chm->g, chm->n*sizeof(cmph_uint32), (size_t)1, f);
|
||||
@ -272,7 +272,7 @@ void chm_load(FILE *f, cmph_t *mphf)
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
cmph_uint32 chm_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||
{
|
||||
@ -287,7 +287,7 @@ cmph_uint32 chm_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||
void chm_destroy(cmph_t *mphf)
|
||||
{
|
||||
chm_data_t *data = (chm_data_t *)mphf->data;
|
||||
free(data->g);
|
||||
free(data->g);
|
||||
hash_state_destroy(data->hashes[0]);
|
||||
hash_state_destroy(data->hashes[1]);
|
||||
free(data->hashes);
|
||||
@ -298,7 +298,7 @@ void chm_destroy(cmph_t *mphf)
|
||||
/** \fn void chm_pack(cmph_t *mphf, void *packed_mphf);
|
||||
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||
* \param mphf pointer to the resulting mphf
|
||||
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||
*/
|
||||
void chm_pack(cmph_t *mphf, void *packed_mphf)
|
||||
{
|
||||
@ -332,26 +332,26 @@ void chm_pack(cmph_t *mphf, void *packed_mphf)
|
||||
ptr += sizeof(data->m);
|
||||
|
||||
// packing g
|
||||
memcpy(ptr, data->g, sizeof(cmph_uint32)*data->n);
|
||||
memcpy(ptr, data->g, sizeof(cmph_uint32)*data->n);
|
||||
}
|
||||
|
||||
/** \fn cmph_uint32 chm_packed_size(cmph_t *mphf);
|
||||
* \brief Return the amount of space needed to pack mphf.
|
||||
* \param mphf pointer to a mphf
|
||||
* \return the size of the packed function or zero for failures
|
||||
*/
|
||||
*/
|
||||
cmph_uint32 chm_packed_size(cmph_t *mphf)
|
||||
{
|
||||
chm_data_t *data = (chm_data_t *)mphf->data;
|
||||
CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
|
||||
CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
|
||||
CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
|
||||
CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
|
||||
|
||||
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
|
||||
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
|
||||
4*sizeof(cmph_uint32) + sizeof(cmph_uint32)*data->n);
|
||||
}
|
||||
|
||||
/** cmph_uint32 chm_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||
* \brief Use the packed mphf to do a search.
|
||||
* \brief Use the packed mphf to do a search.
|
||||
* \param packed_mphf pointer to the packed mphf
|
||||
* \param key key to be hashed
|
||||
* \param keylen key legth in bytes
|
||||
@ -366,16 +366,16 @@ cmph_uint32 chm_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke
|
||||
register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
|
||||
register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr);
|
||||
h2_ptr += 4;
|
||||
|
||||
|
||||
register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type));
|
||||
|
||||
register cmph_uint32 n = *g_ptr++;
|
||||
register cmph_uint32 m = *g_ptr++;
|
||||
|
||||
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n;
|
||||
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n;
|
||||
|
||||
register cmph_uint32 n = *g_ptr++;
|
||||
register cmph_uint32 m = *g_ptr++;
|
||||
|
||||
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n;
|
||||
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n;
|
||||
DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
|
||||
if (h1 == h2 && ++h2 >= n) h2 = 0;
|
||||
DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, g_ptr[h1], g_ptr[h2], m);
|
||||
return (g_ptr[h1] + g_ptr[h2]) % m;
|
||||
return (g_ptr[h1] + g_ptr[h2]) % m;
|
||||
}
|
||||
|
84
src/cmph.c
84
src/cmph.c
@ -1,10 +1,10 @@
|
||||
#include "cmph.h"
|
||||
#include "cmph_structs.h"
|
||||
#include "chm.h"
|
||||
#include "bmz.h"
|
||||
#include "bmz8.h"
|
||||
#include "brz.h"
|
||||
#include "fch.h"
|
||||
#include "bmz.h"
|
||||
#include "bmz8.h"
|
||||
#include "brz.h"
|
||||
#include "fch.h"
|
||||
#include "bdz.h"
|
||||
#include "bdz_ph.h"
|
||||
#include "chd_ph.h"
|
||||
@ -18,18 +18,18 @@
|
||||
|
||||
const char *cmph_names[] = {"bmz", "bmz8", "chm", "brz", "fch", "bdz", "bdz_ph", "chd_ph", "chd", NULL };
|
||||
|
||||
typedef struct
|
||||
typedef struct
|
||||
{
|
||||
void *vector;
|
||||
cmph_uint32 position; // access position when data is a vector
|
||||
cmph_uint32 position; // access position when data is a vector
|
||||
} cmph_vector_t;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
/**
|
||||
* Support a vector of struct as the source of keys.
|
||||
*
|
||||
* E.g. The keys could be the fieldB's in a vector of struct rec where
|
||||
* E.g. The keys could be the fieldB's in a vector of struct rec where
|
||||
* struct rec is defined as:
|
||||
* struct rec {
|
||||
* fieldA;
|
||||
@ -37,7 +37,7 @@ typedef struct
|
||||
* fieldC;
|
||||
* }
|
||||
*/
|
||||
typedef struct
|
||||
typedef struct
|
||||
{
|
||||
void *vector; /* Pointer to the vector of struct */
|
||||
cmph_uint32 position; /* current position */
|
||||
@ -61,7 +61,7 @@ static int key_nlfile_read(void *data, char **key, cmph_uint32 *keylen)
|
||||
while(1)
|
||||
{
|
||||
char buf[BUFSIZ];
|
||||
char *c = fgets(buf, BUFSIZ, fd);
|
||||
char *c = fgets(buf, BUFSIZ, fd);
|
||||
if (c == NULL) return -1;
|
||||
if (feof(fd)) return -1;
|
||||
*key = (char *)realloc(*key, *keylen + strlen(buf) + 1);
|
||||
@ -156,8 +156,12 @@ static cmph_uint32 count_nlfile_keys(FILE *fd)
|
||||
while(1)
|
||||
{
|
||||
char buf[BUFSIZ];
|
||||
ptr = fgets(buf, BUFSIZ, fd);
|
||||
ptr = fgets(buf, BUFSIZ, fd);
|
||||
if (feof(fd)) break;
|
||||
if (ferror(fd) || ptr == NULL) {
|
||||
perror("Error reading input file");
|
||||
return 0;
|
||||
}
|
||||
if (buf[strlen(buf) - 1] != '\n') continue;
|
||||
++count;
|
||||
}
|
||||
@ -264,12 +268,12 @@ cmph_io_adapter_t *cmph_io_struct_vector_adapter(void * vector, cmph_uint32 stru
|
||||
key_source->read = key_struct_vector_read;
|
||||
key_source->dispose = key_vector_dispose;
|
||||
key_source->rewind = key_struct_vector_rewind;
|
||||
return key_source;
|
||||
return key_source;
|
||||
}
|
||||
|
||||
void cmph_io_struct_vector_adapter_destroy(cmph_io_adapter_t * key_source)
|
||||
{
|
||||
cmph_io_struct_vector_destroy(key_source);
|
||||
cmph_io_struct_vector_destroy(key_source);
|
||||
}
|
||||
|
||||
cmph_io_adapter_t *cmph_io_vector_adapter(char ** vector, cmph_uint32 nkeys)
|
||||
@ -370,7 +374,7 @@ void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo)
|
||||
|
||||
void cmph_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir)
|
||||
{
|
||||
if (mph->algo == CMPH_BRZ)
|
||||
if (mph->algo == CMPH_BRZ)
|
||||
{
|
||||
brz_config_set_tmp_dir(mph, tmp_dir);
|
||||
}
|
||||
@ -379,7 +383,7 @@ void cmph_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir)
|
||||
|
||||
void cmph_config_set_mphf_fd(cmph_config_t *mph, FILE *mphf_fd)
|
||||
{
|
||||
if (mph->algo == CMPH_BRZ)
|
||||
if (mph->algo == CMPH_BRZ)
|
||||
{
|
||||
brz_config_set_mphf_fd(mph, mphf_fd);
|
||||
}
|
||||
@ -387,19 +391,19 @@ void cmph_config_set_mphf_fd(cmph_config_t *mph, FILE *mphf_fd)
|
||||
|
||||
void cmph_config_set_b(cmph_config_t *mph, cmph_uint32 b)
|
||||
{
|
||||
if (mph->algo == CMPH_BRZ)
|
||||
if (mph->algo == CMPH_BRZ)
|
||||
{
|
||||
brz_config_set_b(mph, b);
|
||||
}
|
||||
else if (mph->algo == CMPH_BDZ)
|
||||
else if (mph->algo == CMPH_BDZ)
|
||||
{
|
||||
bdz_config_set_b(mph, b);
|
||||
}
|
||||
else if (mph->algo == CMPH_CHD_PH)
|
||||
else if (mph->algo == CMPH_CHD_PH)
|
||||
{
|
||||
chd_ph_config_set_b(mph, b);
|
||||
}
|
||||
else if (mph->algo == CMPH_CHD)
|
||||
else if (mph->algo == CMPH_CHD)
|
||||
{
|
||||
chd_config_set_b(mph, b);
|
||||
}
|
||||
@ -407,11 +411,11 @@ void cmph_config_set_b(cmph_config_t *mph, cmph_uint32 b)
|
||||
|
||||
void cmph_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin)
|
||||
{
|
||||
if (mph->algo == CMPH_CHD_PH)
|
||||
if (mph->algo == CMPH_CHD_PH)
|
||||
{
|
||||
chd_ph_config_set_keys_per_bin(mph, keys_per_bin);
|
||||
}
|
||||
else if (mph->algo == CMPH_CHD)
|
||||
else if (mph->algo == CMPH_CHD)
|
||||
{
|
||||
chd_config_set_keys_per_bin(mph, keys_per_bin);
|
||||
}
|
||||
@ -419,7 +423,7 @@ void cmph_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin)
|
||||
|
||||
void cmph_config_set_memory_availability(cmph_config_t *mph, cmph_uint32 memory_availability)
|
||||
{
|
||||
if (mph->algo == CMPH_BRZ)
|
||||
if (mph->algo == CMPH_BRZ)
|
||||
{
|
||||
brz_config_set_memory_availability(mph, memory_availability);
|
||||
}
|
||||
@ -519,7 +523,7 @@ cmph_t *cmph_new(cmph_config_t *mph)
|
||||
double c = mph->c;
|
||||
|
||||
DEBUGP("Creating mph with algorithm %s\n", cmph_names[mph->algo]);
|
||||
switch (mph->algo)
|
||||
switch (mph->algo)
|
||||
{
|
||||
case CMPH_CHM:
|
||||
DEBUGP("Creating chm hash\n");
|
||||
@ -654,28 +658,28 @@ cmph_uint32 cmph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||
case CMPH_CHM:
|
||||
return chm_search(mphf, key, keylen);
|
||||
case CMPH_BMZ: /* included -- Fabiano */
|
||||
DEBUGP("bmz algorithm search\n");
|
||||
DEBUGP("bmz algorithm search\n");
|
||||
return bmz_search(mphf, key, keylen);
|
||||
case CMPH_BMZ8: /* included -- Fabiano */
|
||||
DEBUGP("bmz8 algorithm search\n");
|
||||
DEBUGP("bmz8 algorithm search\n");
|
||||
return bmz8_search(mphf, key, keylen);
|
||||
case CMPH_BRZ: /* included -- Fabiano */
|
||||
DEBUGP("brz algorithm search\n");
|
||||
DEBUGP("brz algorithm search\n");
|
||||
return brz_search(mphf, key, keylen);
|
||||
case CMPH_FCH: /* included -- Fabiano */
|
||||
DEBUGP("fch algorithm search\n");
|
||||
DEBUGP("fch algorithm search\n");
|
||||
return fch_search(mphf, key, keylen);
|
||||
case CMPH_BDZ: /* included -- Fabiano */
|
||||
DEBUGP("bdz algorithm search\n");
|
||||
DEBUGP("bdz algorithm search\n");
|
||||
return bdz_search(mphf, key, keylen);
|
||||
case CMPH_BDZ_PH: /* included -- Fabiano */
|
||||
DEBUGP("bdz_ph algorithm search\n");
|
||||
DEBUGP("bdz_ph algorithm search\n");
|
||||
return bdz_ph_search(mphf, key, keylen);
|
||||
case CMPH_CHD_PH: /* included -- Fabiano */
|
||||
DEBUGP("chd_ph algorithm search\n");
|
||||
DEBUGP("chd_ph algorithm search\n");
|
||||
return chd_ph_search(mphf, key, keylen);
|
||||
case CMPH_CHD: /* included -- Fabiano */
|
||||
DEBUGP("chd algorithm search\n");
|
||||
DEBUGP("chd algorithm search\n");
|
||||
return chd_search(mphf, key, keylen);
|
||||
default:
|
||||
assert(0);
|
||||
@ -688,7 +692,7 @@ cmph_uint32 cmph_size(cmph_t *mphf)
|
||||
{
|
||||
return mphf->size;
|
||||
}
|
||||
|
||||
|
||||
void cmph_destroy(cmph_t *mphf)
|
||||
{
|
||||
switch(mphf->algo)
|
||||
@ -720,7 +724,7 @@ void cmph_destroy(cmph_t *mphf)
|
||||
case CMPH_CHD: /* included -- Fabiano */
|
||||
chd_destroy(mphf);
|
||||
return;
|
||||
default:
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
assert(0);
|
||||
@ -731,12 +735,12 @@ void cmph_destroy(cmph_t *mphf)
|
||||
/** \fn void cmph_pack(cmph_t *mphf, void *packed_mphf);
|
||||
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||
* \param mphf pointer to the resulting mphf
|
||||
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||
*/
|
||||
void cmph_pack(cmph_t *mphf, void *packed_mphf)
|
||||
{
|
||||
// packing algorithm type to be used in cmph.c
|
||||
cmph_uint32 * ptr = (cmph_uint32 *) packed_mphf;
|
||||
cmph_uint32 * ptr = (cmph_uint32 *) packed_mphf;
|
||||
*ptr++ = mphf->algo;
|
||||
DEBUGP("mphf->algo = %u\n", mphf->algo);
|
||||
switch(mphf->algo)
|
||||
@ -768,7 +772,7 @@ void cmph_pack(cmph_t *mphf, void *packed_mphf)
|
||||
case CMPH_CHD: /* included -- Fabiano */
|
||||
chd_pack(mphf, ptr);
|
||||
break;
|
||||
default:
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
return;
|
||||
@ -778,7 +782,7 @@ void cmph_pack(cmph_t *mphf, void *packed_mphf)
|
||||
* \brief Return the amount of space needed to pack mphf.
|
||||
* \param mphf pointer to a mphf
|
||||
* \return the size of the packed function or zero for failures
|
||||
*/
|
||||
*/
|
||||
cmph_uint32 cmph_packed_size(cmph_t *mphf)
|
||||
{
|
||||
switch(mphf->algo)
|
||||
@ -801,14 +805,14 @@ cmph_uint32 cmph_packed_size(cmph_t *mphf)
|
||||
return chd_ph_packed_size(mphf);
|
||||
case CMPH_CHD: /* included -- Fabiano */
|
||||
return chd_packed_size(mphf);
|
||||
default:
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
return 0; // FAILURE
|
||||
}
|
||||
|
||||
/** cmph_uint32 cmph_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||
* \brief Use the packed mphf to do a search.
|
||||
* \brief Use the packed mphf to do a search.
|
||||
* \param packed_mphf pointer to the packed mphf
|
||||
* \param key key to be hashed
|
||||
* \param keylen key legth in bytes
|
||||
@ -838,7 +842,7 @@ cmph_uint32 cmph_search_packed(void *packed_mphf, const char *key, cmph_uint32 k
|
||||
return chd_ph_search_packed(++ptr, key, keylen);
|
||||
case CMPH_CHD: /* included -- Fabiano */
|
||||
return chd_search_packed(++ptr, key, keylen);
|
||||
default:
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
return 0; // FAILURE
|
||||
|
@ -28,7 +28,7 @@ void __cmph_dump(cmph_t *mphf, FILE *fd)
|
||||
nbytes = fwrite(cmph_names[mphf->algo], (size_t)(strlen(cmph_names[mphf->algo]) + 1), (size_t)1, fd);
|
||||
nbytes = fwrite(&(mphf->size), sizeof(mphf->size), (size_t)1, fd);
|
||||
}
|
||||
cmph_t *__cmph_load(FILE *f)
|
||||
cmph_t *__cmph_load(FILE *f)
|
||||
{
|
||||
cmph_t *mphf = NULL;
|
||||
cmph_uint32 i;
|
||||
@ -36,7 +36,7 @@ cmph_t *__cmph_load(FILE *f)
|
||||
char *ptr = algo_name;
|
||||
CMPH_ALGO algo = CMPH_COUNT;
|
||||
register size_t nbytes;
|
||||
|
||||
|
||||
DEBUGP("Loading mphf\n");
|
||||
while(1)
|
||||
{
|
||||
@ -52,7 +52,7 @@ cmph_t *__cmph_load(FILE *f)
|
||||
algo = i;
|
||||
}
|
||||
}
|
||||
if (algo == CMPH_COUNT)
|
||||
if (algo == CMPH_COUNT)
|
||||
{
|
||||
DEBUGP("Algorithm %s not found\n", algo_name);
|
||||
return NULL;
|
||||
@ -65,5 +65,3 @@ cmph_t *__cmph_load(FILE *f)
|
||||
|
||||
return mphf;
|
||||
}
|
||||
|
||||
|
||||
|
@ -4,6 +4,7 @@
|
||||
djb2_state_t *djb2_state_new()
|
||||
{
|
||||
djb2_state_t *state = (djb2_state_t *)malloc(sizeof(djb2_state_t));
|
||||
if (!djb2_state) return NULL;
|
||||
state->hashfunc = CMPH_HASH_DJB2;
|
||||
return state;
|
||||
}
|
||||
@ -18,7 +19,7 @@ cmph_uint32 djb2_hash(djb2_state_t *state, const char *k, cmph_uint32 keylen)
|
||||
register cmph_uint32 hash = 5381;
|
||||
const unsigned char *ptr = (unsigned char *)k;
|
||||
cmph_uint32 i = 0;
|
||||
while (i < keylen)
|
||||
while (i < keylen)
|
||||
{
|
||||
hash = hash*33 ^ *ptr;
|
||||
++ptr, ++i;
|
||||
|
89
src/fch.c
89
src/fch.c
@ -23,7 +23,7 @@ fch_config_data_t *fch_config_new()
|
||||
{
|
||||
fch_config_data_t *fch;
|
||||
fch = (fch_config_data_t *)malloc(sizeof(fch_config_data_t));
|
||||
assert(fch);
|
||||
if (!fch) return NULL;
|
||||
memset(fch, 0, sizeof(fch_config_data_t));
|
||||
fch->hashfuncs[0] = CMPH_HASH_JENKINS;
|
||||
fch->hashfuncs[1] = CMPH_HASH_JENKINS;
|
||||
@ -50,7 +50,7 @@ void fch_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
|
||||
while(*hashptr != CMPH_HASH_COUNT)
|
||||
{
|
||||
if (i >= 2) break; //fch only uses two hash functions
|
||||
fch->hashfuncs[i] = *hashptr;
|
||||
fch->hashfuncs[i] = *hashptr;
|
||||
++i, ++hashptr;
|
||||
}
|
||||
}
|
||||
@ -88,36 +88,36 @@ static fch_buckets_t * mapping(cmph_config_t *mph)
|
||||
fch_buckets_t *buckets = NULL;
|
||||
fch_config_data_t *fch = (fch_config_data_t *)mph->data;
|
||||
if (fch->h1) hash_state_destroy(fch->h1);
|
||||
fch->h1 = hash_state_new(fch->hashfuncs[0], fch->m);
|
||||
fch->h1 = hash_state_new(fch->hashfuncs[0], fch->m);
|
||||
fch->b = fch_calc_b(fch->c, fch->m);
|
||||
fch->p1 = fch_calc_p1(fch->m);
|
||||
fch->p2 = fch_calc_p2(fch->b);
|
||||
//DEBUGP("b:%u p1:%f p2:%f\n", fch->b, fch->p1, fch->p2);
|
||||
buckets = fch_buckets_new(fch->b);
|
||||
|
||||
mph->key_source->rewind(mph->key_source->data);
|
||||
mph->key_source->rewind(mph->key_source->data);
|
||||
for(i = 0; i < fch->m; i++)
|
||||
{
|
||||
cmph_uint32 h1, keylen;
|
||||
char *key = NULL;
|
||||
mph->key_source->read(mph->key_source->data, &key, &keylen);
|
||||
mph->key_source->read(mph->key_source->data, &key, &keylen);
|
||||
h1 = hash(fch->h1, key, keylen) % fch->m;
|
||||
h1 = mixh10h11h12 (fch->b, fch->p1, fch->p2, h1);
|
||||
fch_buckets_insert(buckets, h1, key, keylen);
|
||||
key = NULL; // transger memory ownership
|
||||
|
||||
|
||||
}
|
||||
return buckets;
|
||||
return buckets;
|
||||
}
|
||||
|
||||
|
||||
// returns the buckets indexes sorted by their sizes.
|
||||
// returns the buckets indexes sorted by their sizes.
|
||||
static cmph_uint32 * ordering(fch_buckets_t * buckets)
|
||||
{
|
||||
return fch_buckets_get_indexes_sorted_by_size(buckets);
|
||||
}
|
||||
|
||||
/* Check whether function h2 causes collisions among the keys of each bucket */
|
||||
/* Check whether function h2 causes collisions among the keys of each bucket */
|
||||
static cmph_uint8 check_for_collisions_h2(fch_config_data_t *fch, fch_buckets_t * buckets, cmph_uint32 *sorted_indexes)
|
||||
{
|
||||
//cmph_uint32 max_size = fch_buckets_get_max_size(buckets);
|
||||
@ -146,7 +146,7 @@ static cmph_uint8 check_for_collisions_h2(fch_config_data_t *fch, fch_buckets_t
|
||||
}
|
||||
|
||||
static void permut(cmph_uint32 * vector, cmph_uint32 n)
|
||||
{
|
||||
{
|
||||
cmph_uint32 i, j, b;
|
||||
for (i = 0; i < n; i++) {
|
||||
j = (cmph_uint32) rand() % n;
|
||||
@ -179,12 +179,12 @@ static cmph_uint8 searching(fch_config_data_t *fch, fch_buckets_t *buckets, cmph
|
||||
{
|
||||
map_table[random_table[i]] = i;
|
||||
}
|
||||
do {
|
||||
do {
|
||||
if (fch->h2) hash_state_destroy(fch->h2);
|
||||
fch->h2 = hash_state_new(fch->hashfuncs[1], fch->m);
|
||||
fch->h2 = hash_state_new(fch->hashfuncs[1], fch->m);
|
||||
restart = check_for_collisions_h2(fch, buckets, sorted_indexes);
|
||||
filled_count = 0;
|
||||
if (!restart)
|
||||
if (!restart)
|
||||
{
|
||||
searching_iterations++; iteration_to_generate_h2 = 0;
|
||||
//DEBUGP("searching_iterations: %u\n", searching_iterations);
|
||||
@ -192,7 +192,7 @@ static cmph_uint8 searching(fch_config_data_t *fch, fch_buckets_t *buckets, cmph
|
||||
else {
|
||||
iteration_to_generate_h2++;
|
||||
//DEBUGP("iteration_to_generate_h2: %u\n", iteration_to_generate_h2);
|
||||
}
|
||||
}
|
||||
for(i = 0; (i < nbuckets) && !restart; i++) {
|
||||
cmph_uint32 bucketsize = fch_buckets_get_size(buckets, sorted_indexes[i]);
|
||||
if (bucketsize == 0)
|
||||
@ -204,8 +204,8 @@ static cmph_uint8 searching(fch_config_data_t *fch, fch_buckets_t *buckets, cmph
|
||||
for(z = 0; (z < (fch->m - filled_count)) && restart; z++) {
|
||||
char * key = fch_buckets_get_key(buckets, sorted_indexes[i], INDEX);
|
||||
cmph_uint32 keylen = fch_buckets_get_keylength(buckets, sorted_indexes[i], INDEX);
|
||||
cmph_uint32 h2 = hash(fch->h2, key, keylen) % fch->m;
|
||||
counter = 0;
|
||||
cmph_uint32 h2 = hash(fch->h2, key, keylen) % fch->m;
|
||||
counter = 0;
|
||||
restart = 0; // false
|
||||
fch->g[sorted_indexes[i]] = (fch->m + random_table[filled_count + z] - h2) % fch->m;
|
||||
//DEBUGP("g[%u]: %u\n", sorted_indexes[i], fch->g[sorted_indexes[i]]);
|
||||
@ -217,7 +217,7 @@ static cmph_uint8 searching(fch_config_data_t *fch, fch_buckets_t *buckets, cmph
|
||||
h2 = hash(fch->h2, key, keylen) % fch->m;
|
||||
index = (h2 + fch->g[sorted_indexes[i]]) % fch->m;
|
||||
//DEBUGP("key:%s keylen:%u index: %u h2:%u bucketsize:%u\n", key, keylen, index, h2, bucketsize);
|
||||
if (map_table[index] >= filled_count) {
|
||||
if (map_table[index] >= filled_count) {
|
||||
cmph_uint32 y = map_table[index];
|
||||
cmph_uint32 ry = random_table[y];
|
||||
random_table[y] = random_table[filled_count];
|
||||
@ -225,19 +225,19 @@ static cmph_uint8 searching(fch_config_data_t *fch, fch_buckets_t *buckets, cmph
|
||||
map_table[random_table[y]] = y;
|
||||
map_table[random_table[filled_count]] = filled_count;
|
||||
filled_count++;
|
||||
counter ++;
|
||||
counter ++;
|
||||
}
|
||||
else {
|
||||
else {
|
||||
restart = 1; // true
|
||||
filled_count = filled_count - counter;
|
||||
counter = 0;
|
||||
counter = 0;
|
||||
break;
|
||||
}
|
||||
j = (j + 1) % bucketsize;
|
||||
} while(j % bucketsize != INDEX);
|
||||
} while(j % bucketsize != INDEX);
|
||||
}
|
||||
//getchar();
|
||||
}
|
||||
}
|
||||
} while(restart && (searching_iterations < 10) && (iteration_to_generate_h2 < 1000));
|
||||
free(map_table);
|
||||
free(random_table);
|
||||
@ -264,7 +264,7 @@ cmph_t *fch_new(cmph_config_t *mph, double c)
|
||||
fch->h2 = NULL;
|
||||
fch->g = NULL;
|
||||
do
|
||||
{
|
||||
{
|
||||
if (mph->verbosity)
|
||||
{
|
||||
fprintf(stderr, "Entering mapping step for mph creation of %u keys\n", fch->m);
|
||||
@ -283,7 +283,7 @@ cmph_t *fch_new(cmph_config_t *mph, double c)
|
||||
}
|
||||
restart_mapping = searching(fch, buckets, sorted_indexes);
|
||||
iterations--;
|
||||
|
||||
|
||||
} while(restart_mapping && iterations > 0);
|
||||
if (buckets) fch_buckets_destroy(buckets);
|
||||
if (sorted_indexes) free (sorted_indexes);
|
||||
@ -317,7 +317,7 @@ int fch_dump(cmph_t *mphf, FILE *fd)
|
||||
char *buf = NULL;
|
||||
cmph_uint32 buflen;
|
||||
register size_t nbytes;
|
||||
|
||||
|
||||
fch_data_t *data = (fch_data_t *)mphf->data;
|
||||
__cmph_dump(mphf, fd);
|
||||
|
||||
@ -365,7 +365,7 @@ void fch_load(FILE *f, cmph_t *mphf)
|
||||
nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
|
||||
fch->h1 = hash_state_load(buf, buflen);
|
||||
free(buf);
|
||||
|
||||
|
||||
//DEBUGP("Loading fch mphf\n");
|
||||
mphf->data = fch;
|
||||
//DEBUGP("Reading h2\n");
|
||||
@ -376,8 +376,8 @@ void fch_load(FILE *f, cmph_t *mphf)
|
||||
nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
|
||||
fch->h2 = hash_state_load(buf, buflen);
|
||||
free(buf);
|
||||
|
||||
|
||||
|
||||
|
||||
//DEBUGP("Reading m and n\n");
|
||||
nbytes = fread(&(fch->m), sizeof(cmph_uint32), (size_t)1, f);
|
||||
nbytes = fread(&(fch->c), sizeof(double), (size_t)1, f);
|
||||
@ -418,7 +418,7 @@ void fch_destroy(cmph_t *mphf)
|
||||
/** \fn void fch_pack(cmph_t *mphf, void *packed_mphf);
|
||||
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||
* \param mphf pointer to the resulting mphf
|
||||
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||
*/
|
||||
void fch_pack(cmph_t *mphf, void *packed_mphf)
|
||||
{
|
||||
@ -450,37 +450,37 @@ void fch_pack(cmph_t *mphf, void *packed_mphf)
|
||||
// packing b
|
||||
*((cmph_uint32 *) ptr) = data->b;
|
||||
ptr += sizeof(data->b);
|
||||
|
||||
|
||||
// packing p1
|
||||
*((cmph_uint64 *)ptr) = (cmph_uint64)data->p1;
|
||||
*((cmph_uint64 *)ptr) = (cmph_uint64)data->p1;
|
||||
ptr += sizeof(data->p1);
|
||||
|
||||
// packing p2
|
||||
*((cmph_uint64 *)ptr) = (cmph_uint64)data->p2;
|
||||
*((cmph_uint64 *)ptr) = (cmph_uint64)data->p2;
|
||||
ptr += sizeof(data->p2);
|
||||
|
||||
// packing g
|
||||
memcpy(ptr, data->g, sizeof(cmph_uint32)*(data->b));
|
||||
memcpy(ptr, data->g, sizeof(cmph_uint32)*(data->b));
|
||||
}
|
||||
|
||||
/** \fn cmph_uint32 fch_packed_size(cmph_t *mphf);
|
||||
* \brief Return the amount of space needed to pack mphf.
|
||||
* \param mphf pointer to a mphf
|
||||
* \return the size of the packed function or zero for failures
|
||||
*/
|
||||
*/
|
||||
cmph_uint32 fch_packed_size(cmph_t *mphf)
|
||||
{
|
||||
fch_data_t *data = (fch_data_t *)mphf->data;
|
||||
CMPH_HASH h1_type = hash_get_type(data->h1);
|
||||
CMPH_HASH h2_type = hash_get_type(data->h2);
|
||||
CMPH_HASH h1_type = hash_get_type(data->h1);
|
||||
CMPH_HASH h2_type = hash_get_type(data->h2);
|
||||
|
||||
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
|
||||
return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
|
||||
4*sizeof(cmph_uint32) + 2*sizeof(double) + sizeof(cmph_uint32)*(data->b));
|
||||
}
|
||||
|
||||
|
||||
/** cmph_uint32 fch_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||
* \brief Use the packed mphf to do a search.
|
||||
* \brief Use the packed mphf to do a search.
|
||||
* \param packed_mphf pointer to the packed mphf
|
||||
* \param key key to be hashed
|
||||
* \param keylen key legth in bytes
|
||||
@ -495,12 +495,12 @@ cmph_uint32 fch_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke
|
||||
register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
|
||||
register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr);
|
||||
h2_ptr += 4;
|
||||
|
||||
register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type));
|
||||
|
||||
register cmph_uint32 m = *g_ptr++;
|
||||
|
||||
register cmph_uint32 b = *g_ptr++;
|
||||
register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type));
|
||||
|
||||
register cmph_uint32 m = *g_ptr++;
|
||||
|
||||
register cmph_uint32 b = *g_ptr++;
|
||||
|
||||
register double p1 = (double)(*((cmph_uint64 *)g_ptr));
|
||||
g_ptr += 2;
|
||||
@ -508,10 +508,9 @@ cmph_uint32 fch_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke
|
||||
register double p2 = (double)(*((cmph_uint64 *)g_ptr));
|
||||
g_ptr += 2;
|
||||
|
||||
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % m;
|
||||
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % m;
|
||||
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % m;
|
||||
|
||||
h1 = mixh10h11h12 (b, p1, p2, h1);
|
||||
return (h2 + g_ptr[h1]) % m;
|
||||
}
|
||||
|
||||
|
@ -20,7 +20,7 @@ typedef struct __fch_bucket_t
|
||||
|
||||
|
||||
|
||||
static void fch_bucket_new(fch_bucket_t *bucket)
|
||||
static void fch_bucket_new(fch_bucket_t *bucket)
|
||||
{
|
||||
assert(bucket);
|
||||
bucket->size = 0;
|
||||
@ -109,16 +109,16 @@ struct __fch_buckets_t
|
||||
{
|
||||
fch_bucket_t * values;
|
||||
cmph_uint32 nbuckets, max_size;
|
||||
|
||||
|
||||
};
|
||||
|
||||
fch_buckets_t * fch_buckets_new(cmph_uint32 nbuckets)
|
||||
{
|
||||
cmph_uint32 i;
|
||||
fch_buckets_t *buckets = (fch_buckets_t *)malloc(sizeof(fch_buckets_t));
|
||||
assert(buckets);
|
||||
if (!buckets) return NULL;
|
||||
buckets->values = (fch_bucket_t *)calloc((size_t)nbuckets, sizeof(fch_bucket_t));
|
||||
for (i = 0; i < nbuckets; i++) fch_bucket_new(buckets->values + i);
|
||||
for (i = 0; i < nbuckets; i++) fch_bucket_new(buckets->values + i);
|
||||
assert(buckets->values);
|
||||
buckets->nbuckets = nbuckets;
|
||||
buckets->max_size = 0;
|
||||
@ -135,7 +135,7 @@ void fch_buckets_insert(fch_buckets_t * buckets, cmph_uint32 index, char * key,
|
||||
{
|
||||
assert(index < buckets->nbuckets);
|
||||
fch_bucket_insert(buckets->values + index, key, length);
|
||||
if (fch_bucket_size(buckets->values + index) > buckets->max_size)
|
||||
if (fch_bucket_size(buckets->values + index) > buckets->max_size)
|
||||
{
|
||||
buckets->max_size = fch_bucket_size(buckets->values + index);
|
||||
}
|
||||
@ -170,16 +170,16 @@ cmph_uint32 fch_buckets_get_nbuckets(fch_buckets_t * buckets)
|
||||
return buckets->nbuckets;
|
||||
}
|
||||
|
||||
cmph_uint32 * fch_buckets_get_indexes_sorted_by_size(fch_buckets_t * buckets)
|
||||
cmph_uint32 * fch_buckets_get_indexes_sorted_by_size(fch_buckets_t * buckets)
|
||||
{
|
||||
cmph_uint32 i = 0;
|
||||
cmph_uint32 sum = 0, value;
|
||||
cmph_uint32 *nbuckets_size = (cmph_uint32 *) calloc((size_t)buckets->max_size + 1, sizeof(cmph_uint32));
|
||||
cmph_uint32 * sorted_indexes = (cmph_uint32 *) calloc((size_t)buckets->nbuckets, sizeof(cmph_uint32));
|
||||
|
||||
|
||||
// collect how many buckets for each size.
|
||||
for(i = 0; i < buckets->nbuckets; i++) nbuckets_size[fch_bucket_size(buckets->values + i)] ++;
|
||||
|
||||
|
||||
// calculating offset considering a decreasing order of buckets size.
|
||||
value = nbuckets_size[buckets->max_size];
|
||||
nbuckets_size[buckets->max_size] = sum;
|
||||
@ -188,13 +188,13 @@ cmph_uint32 * fch_buckets_get_indexes_sorted_by_size(fch_buckets_t * buckets)
|
||||
sum += value;
|
||||
value = nbuckets_size[i];
|
||||
nbuckets_size[i] = sum;
|
||||
|
||||
|
||||
}
|
||||
for(i = 0; i < buckets->nbuckets; i++)
|
||||
for(i = 0; i < buckets->nbuckets; i++)
|
||||
{
|
||||
sorted_indexes[nbuckets_size[fch_bucket_size(buckets->values + i)]] = (cmph_uint32)i;
|
||||
nbuckets_size[fch_bucket_size(buckets->values + i)] ++;
|
||||
}
|
||||
}
|
||||
free(nbuckets_size);
|
||||
return sorted_indexes;
|
||||
}
|
||||
@ -208,7 +208,7 @@ void fch_buckets_print(fch_buckets_t * buckets)
|
||||
void fch_buckets_destroy(fch_buckets_t * buckets)
|
||||
{
|
||||
cmph_uint32 i;
|
||||
for (i = 0; i < buckets->nbuckets; i++) fch_bucket_destroy(buckets->values + i);
|
||||
for (i = 0; i < buckets->nbuckets; i++) fch_bucket_destroy(buckets->values + i);
|
||||
free(buckets->values);
|
||||
free(buckets);
|
||||
}
|
||||
|
@ -4,6 +4,7 @@
|
||||
fnv_state_t *fnv_state_new()
|
||||
{
|
||||
fnv_state_t *state = (fnv_state_t *)malloc(sizeof(fnv_state_t));
|
||||
if (!state) return NULL;
|
||||
state->hashfunc = CMPH_HASH_FNV;
|
||||
return state;
|
||||
}
|
||||
@ -15,13 +16,13 @@ void fnv_state_destroy(fnv_state_t *state)
|
||||
|
||||
cmph_uint32 fnv_hash(fnv_state_t *state, const char *k, cmph_uint32 keylen)
|
||||
{
|
||||
const unsigned char *bp = (const unsigned char *)k;
|
||||
const unsigned char *be = bp + keylen;
|
||||
static unsigned int hval = 0;
|
||||
const unsigned char *bp = (const unsigned char *)k;
|
||||
const unsigned char *be = bp + keylen;
|
||||
static unsigned int hval = 0;
|
||||
|
||||
while (bp < be)
|
||||
while (bp < be)
|
||||
{
|
||||
|
||||
|
||||
//hval *= 0x01000193; good for non-gcc compiler
|
||||
hval += (hval << 1) + (hval << 4) + (hval << 7) + (hval << 8) + (hval << 24); //good for gcc
|
||||
|
||||
@ -41,6 +42,7 @@ void fnv_state_dump(fnv_state_t *state, char **buf, cmph_uint32 *buflen)
|
||||
fnv_state_t * fnv_state_copy(fnv_state_t *src_state)
|
||||
{
|
||||
fnv_state_t *dest_state = (fnv_state_t *)malloc(sizeof(fnv_state_t));
|
||||
if (!dest_state) return NULL;
|
||||
dest_state->hashfunc = src_state->hashfunc;
|
||||
return dest_state;
|
||||
}
|
||||
|
30
src/graph.c
30
src/graph.c
@ -77,7 +77,7 @@ void graph_print(graph_t *g)
|
||||
printf("%u -> %u\n", g->edges[abs_edge(e, 0)], g->edges[abs_edge(e, 1)]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
return;
|
||||
}
|
||||
@ -130,7 +130,7 @@ static void del_edge_point(graph_t *g, cmph_uint32 v1, cmph_uint32 v2)
|
||||
|
||||
DEBUGP("Deleting edge point %u %u\n", v1, v2);
|
||||
e = g->first[v1];
|
||||
if (check_edge(g, e, v1, v2))
|
||||
if (check_edge(g, e, v1, v2))
|
||||
{
|
||||
g->first[v1] = g->next[e];
|
||||
//g->edges[e] = EMPTY;
|
||||
@ -151,7 +151,7 @@ static void del_edge_point(graph_t *g, cmph_uint32 v1, cmph_uint32 v2)
|
||||
DEBUGP("Deleted\n");
|
||||
}
|
||||
|
||||
|
||||
|
||||
void graph_del_edge(graph_t *g, cmph_uint32 v1, cmph_uint32 v2)
|
||||
{
|
||||
g->shrinking = 1;
|
||||
@ -163,7 +163,7 @@ void graph_clear_edges(graph_t *g)
|
||||
{
|
||||
cmph_uint32 i;
|
||||
for (i = 0; i < g->nnodes; ++i) g->first[i] = EMPTY;
|
||||
for (i = 0; i < g->nedges*2; ++i)
|
||||
for (i = 0; i < g->nedges*2; ++i)
|
||||
{
|
||||
g->edges[i] = EMPTY;
|
||||
g->next[i] = EMPTY;
|
||||
@ -178,7 +178,7 @@ static cmph_uint8 find_degree1_edge(graph_t *g, cmph_uint32 v, cmph_uint8 *delet
|
||||
cmph_uint8 found = 0;
|
||||
DEBUGP("Checking degree of vertex %u connected to edge %u\n", v, edge);
|
||||
if (edge == EMPTY) return 0;
|
||||
else if (!(GETBIT(deleted, abs_edge(edge, 0))))
|
||||
else if (!(GETBIT(deleted, abs_edge(edge, 0))))
|
||||
{
|
||||
found = 1;
|
||||
*e = edge;
|
||||
@ -206,17 +206,17 @@ static void cyclic_del_edge(graph_t *g, cmph_uint32 v, cmph_uint8 *deleted)
|
||||
|
||||
degree1 = find_degree1_edge(g, v1, deleted, &e);
|
||||
if (!degree1) return;
|
||||
while(1)
|
||||
while(1)
|
||||
{
|
||||
DEBUGP("Deleting edge %u (%u->%u)\n", e, g->edges[abs_edge(e, 0)], g->edges[abs_edge(e, 1)]);
|
||||
SETBIT(deleted, abs_edge(e, 0));
|
||||
|
||||
|
||||
v2 = g->edges[abs_edge(e, 0)];
|
||||
if (v2 == v1) v2 = g->edges[abs_edge(e, 1)];
|
||||
|
||||
DEBUGP("Checking if second endpoint %u has degree 1\n", v2);
|
||||
DEBUGP("Checking if second endpoint %u has degree 1\n", v2);
|
||||
degree1 = find_degree1_edge(g, v2, deleted, &e);
|
||||
if (degree1)
|
||||
if (degree1)
|
||||
{
|
||||
DEBUGP("Inspecting vertex %u\n", v2);
|
||||
v1 = v2;
|
||||
@ -240,7 +240,7 @@ int graph_is_cyclic(graph_t *g)
|
||||
}
|
||||
for (i = 0; i < g->nedges; ++i)
|
||||
{
|
||||
if (!(GETBIT(deleted, i)))
|
||||
if (!(GETBIT(deleted, i)))
|
||||
{
|
||||
DEBUGP("Edge %u %u->%u was not deleted\n", i, g->edges[i], g->edges[i + g->nedges]);
|
||||
free(deleted);
|
||||
@ -275,15 +275,15 @@ void graph_obtain_critical_nodes(graph_t *g) /* included -- Fabiano*/
|
||||
|
||||
for (i = 0; i < g->nedges; ++i)
|
||||
{
|
||||
if (!(GETBIT(deleted,i)))
|
||||
if (!(GETBIT(deleted,i)))
|
||||
{
|
||||
DEBUGP("Edge %u %u->%u belongs to the 2-core\n", i, g->edges[i], g->edges[i + g->nedges]);
|
||||
if(!(GETBIT(g->critical_nodes,g->edges[i])))
|
||||
if(!(GETBIT(g->critical_nodes,g->edges[i])))
|
||||
{
|
||||
g->ncritical_nodes ++;
|
||||
SETBIT(g->critical_nodes,g->edges[i]);
|
||||
}
|
||||
if(!(GETBIT(g->critical_nodes,g->edges[i + g->nedges])))
|
||||
if(!(GETBIT(g->critical_nodes,g->edges[i + g->nedges])))
|
||||
{
|
||||
g->ncritical_nodes ++;
|
||||
SETBIT(g->critical_nodes,g->edges[i + g->nedges]);
|
||||
@ -328,11 +328,9 @@ graph_iterator_t graph_neighbors_it(graph_t *g, cmph_uint32 v)
|
||||
cmph_uint32 graph_next_neighbor(graph_t *g, graph_iterator_t* it)
|
||||
{
|
||||
cmph_uint32 ret;
|
||||
if(it->edge == EMPTY) return GRAPH_NO_NEIGHBOR;
|
||||
if(it->edge == EMPTY) return GRAPH_NO_NEIGHBOR;
|
||||
if (g->edges[it->edge] == it->vertex) ret = g->edges[it->edge + g->nedges];
|
||||
else ret = g->edges[it->edge];
|
||||
it->edge = g->next[it->edge];
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
|
10
src/hash.c
10
src/hash.c
@ -133,7 +133,7 @@ void hash_state_destroy(hash_state_t *state)
|
||||
* \brief Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed.
|
||||
* \param state points to the hash function
|
||||
* \param hash_packed pointer to the contiguous memory area used to store the hash function. The size of hash_packed must be at least hash_state_packed_size()
|
||||
*
|
||||
*
|
||||
* Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed.
|
||||
* However, the hash function type must be packed outside.
|
||||
*/
|
||||
@ -142,20 +142,20 @@ void hash_state_pack(hash_state_t *state, void *hash_packed)
|
||||
switch (state->hashfunc)
|
||||
{
|
||||
case CMPH_HASH_JENKINS:
|
||||
// pack the jenkins hash function
|
||||
// pack the jenkins hash function
|
||||
jenkins_state_pack((jenkins_state_t *)state, hash_packed);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
return;
|
||||
return;
|
||||
}
|
||||
|
||||
/** \fn cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc)
|
||||
* \brief Return the amount of space needed to pack a hash function.
|
||||
* \param hashfunc function type
|
||||
* \return the size of the packed function or zero for failures
|
||||
*/
|
||||
*/
|
||||
cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc)
|
||||
{
|
||||
cmph_uint32 size = 0;
|
||||
@ -197,7 +197,7 @@ cmph_uint32 hash_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cm
|
||||
* \param hashes is a pointer to a memory large enough to fit three 32-bit integers.
|
||||
*/
|
||||
void hash_vector_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
|
||||
{
|
||||
{
|
||||
switch (hashfunc)
|
||||
{
|
||||
case CMPH_HASH_JENKINS:
|
||||
|
@ -41,7 +41,7 @@ void hashtree_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
|
||||
while(*hashptr != CMPH_HASH_COUNT)
|
||||
{
|
||||
if (i >= 3) break; //hashtree only uses three hash functions
|
||||
hashtree->hashfuncs[i] = *hashptr;
|
||||
hashtree->hashfuncs[i] = *hashptr;
|
||||
++i, ++hashptr;
|
||||
}
|
||||
}
|
||||
@ -55,8 +55,8 @@ cmph_t *hashtree_new(cmph_config_t *mph, double c)
|
||||
cmph_uint32 iterations = 20;
|
||||
cmph_uint8 *visited = NULL;
|
||||
hashtree_config_data_t *hashtree = (hashtree_config_data_t *)mph->data;
|
||||
hashtree->m = mph->key_source->nkeys;
|
||||
hashtree->n = ceil(c * mph->key_source->nkeys);
|
||||
hashtree->m = mph->key_source->nkeys;
|
||||
hashtree->n = ceil(c * mph->key_source->nkeys);
|
||||
DEBUGP("m (edges): %u n (vertices): %u c: %f\n", hashtree->m, hashtree->n, c);
|
||||
hashtree->graph = graph_new(hashtree->n, hashtree->m);
|
||||
DEBUGP("Created graph\n");
|
||||
@ -87,12 +87,12 @@ cmph_t *hashtree_new(cmph_config_t *mph, double c)
|
||||
fprintf(stderr, "Acyclic graph creation failure - %u iterations remaining\n", iterations);
|
||||
}
|
||||
if (iterations == 0) break;
|
||||
}
|
||||
else break;
|
||||
}
|
||||
else break;
|
||||
}
|
||||
if (iterations == 0)
|
||||
{
|
||||
graph_destroy(hashtree->graph);
|
||||
graph_destroy(hashtree->graph);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -115,7 +115,7 @@ cmph_t *hashtree_new(cmph_config_t *mph, double c)
|
||||
hashtree_traverse(hashtree, visited, i);
|
||||
}
|
||||
}
|
||||
graph_destroy(hashtree->graph);
|
||||
graph_destroy(hashtree->graph);
|
||||
free(visited);
|
||||
hashtree->graph = NULL;
|
||||
|
||||
@ -144,7 +144,7 @@ static void hashtree_traverse(hashtree_config_data_t *hashtree, cmph_uint8 *visi
|
||||
graph_iterator_t it = graph_neighbors_it(hashtree->graph, v);
|
||||
cmph_uint32 neighbor = 0;
|
||||
SETBIT(visited,v);
|
||||
|
||||
|
||||
DEBUGP("Visiting vertex %u\n", v);
|
||||
while((neighbor = graph_next_neighbor(hashtree->graph, &it)) != GRAPH_NO_NEIGHBOR)
|
||||
{
|
||||
@ -157,7 +157,7 @@ static void hashtree_traverse(hashtree_config_data_t *hashtree, cmph_uint8 *visi
|
||||
hashtree_traverse(hashtree, visited, neighbor);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static int hashtree_gen_edges(cmph_config_t *mph)
|
||||
{
|
||||
cmph_uint32 e;
|
||||
@ -165,7 +165,7 @@ static int hashtree_gen_edges(cmph_config_t *mph)
|
||||
int cycles = 0;
|
||||
|
||||
DEBUGP("Generating edges for %u vertices with hash functions %s and %s\n", hashtree->n, cmph_hash_names[hashtree->hashfuncs[0]], cmph_hash_names[hashtree->hashfuncs[1]]);
|
||||
graph_clear_edges(hashtree->graph);
|
||||
graph_clear_edges(hashtree->graph);
|
||||
mph->key_source->rewind(mph->key_source->data);
|
||||
for (e = 0; e < mph->key_source->nkeys; ++e)
|
||||
{
|
||||
@ -176,7 +176,7 @@ static int hashtree_gen_edges(cmph_config_t *mph)
|
||||
h1 = hash(hashtree->hashes[0], key, keylen) % hashtree->n;
|
||||
h2 = hash(hashtree->hashes[1], key, keylen) % hashtree->n;
|
||||
if (h1 == h2) if (++h2 >= hashtree->n) h2 = 0;
|
||||
if (h1 == h2)
|
||||
if (h1 == h2)
|
||||
{
|
||||
if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e);
|
||||
mph->key_source->dispose(mph->key_source->data, key, keylen);
|
||||
@ -216,7 +216,7 @@ int hashtree_dump(cmph_t *mphf, FILE *fd)
|
||||
|
||||
fwrite(&(data->n), sizeof(cmph_uint32), 1, fd);
|
||||
fwrite(&(data->m), sizeof(cmph_uint32), 1, fd);
|
||||
|
||||
|
||||
fwrite(data->g, sizeof(cmph_uint32)*data->n, 1, fd);
|
||||
#ifdef DEBUG
|
||||
fprintf(stderr, "G: ");
|
||||
@ -253,8 +253,8 @@ void hashtree_load(FILE *f, cmph_t *mphf)
|
||||
}
|
||||
|
||||
DEBUGP("Reading m and n\n");
|
||||
fread(&(hashtree->n), sizeof(cmph_uint32), 1, f);
|
||||
fread(&(hashtree->m), sizeof(cmph_uint32), 1, f);
|
||||
fread(&(hashtree->n), sizeof(cmph_uint32), 1, f);
|
||||
fread(&(hashtree->m), sizeof(cmph_uint32), 1, f);
|
||||
|
||||
hashtree->g = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*hashtree->n);
|
||||
fread(hashtree->g, hashtree->n*sizeof(cmph_uint32), 1, f);
|
||||
@ -265,7 +265,7 @@ void hashtree_load(FILE *f, cmph_t *mphf)
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
cmph_uint32 hashtree_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||
{
|
||||
@ -280,7 +280,7 @@ cmph_uint32 hashtree_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||
void hashtree_destroy(cmph_t *mphf)
|
||||
{
|
||||
hashtree_data_t *data = (hashtree_data_t *)mphf->data;
|
||||
free(data->g);
|
||||
free(data->g);
|
||||
hash_state_destroy(data->hashes[0]);
|
||||
hash_state_destroy(data->hashes[1]);
|
||||
free(data->hashes);
|
||||
|
@ -28,16 +28,16 @@
|
||||
have at least 1/4 probability of changing.
|
||||
* If mix() is run forward, every bit of c will change between 1/3 and
|
||||
2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
|
||||
mix() was built out of 36 single-cycle latency instructions in a
|
||||
mix() was built out of 36 single-cycle latency instructions in a
|
||||
structure that could supported 2x parallelism, like so:
|
||||
a -= b;
|
||||
a -= b;
|
||||
a -= c; x = (c>>13);
|
||||
b -= c; a ^= x;
|
||||
b -= a; x = (a<<8);
|
||||
c -= a; b ^= x;
|
||||
c -= b; x = (b>>13);
|
||||
...
|
||||
Unfortunately, superscalar Pentiums and Sparcs can't take advantage
|
||||
Unfortunately, superscalar Pentiums and Sparcs can't take advantage
|
||||
of that parallelism. They've also turned some of those single-cycle
|
||||
latency instructions into multi-cycle latency instructions. Still,
|
||||
this is the fastest good hash I could find. There were about 2^^68
|
||||
@ -87,6 +87,7 @@ acceptable. Do NOT use for cryptographic purposes.
|
||||
jenkins_state_t *jenkins_state_new(cmph_uint32 size) //size of hash table
|
||||
{
|
||||
jenkins_state_t *state = (jenkins_state_t *)malloc(sizeof(jenkins_state_t));
|
||||
if (!state) return NULL;
|
||||
DEBUGP("Initializing jenkins hash\n");
|
||||
state->seed = ((cmph_uint32)rand() % size);
|
||||
return state;
|
||||
@ -121,28 +122,28 @@ static inline void __jenkins_hash_vector(cmph_uint32 seed, const char *k, cmph_u
|
||||
hashes[2] += length;
|
||||
switch(len) /* all the case statements fall through */
|
||||
{
|
||||
case 11:
|
||||
case 11:
|
||||
hashes[2] +=((cmph_uint32)k[10]<<24);
|
||||
case 10:
|
||||
case 10:
|
||||
hashes[2] +=((cmph_uint32)k[9]<<16);
|
||||
case 9 :
|
||||
case 9 :
|
||||
hashes[2] +=((cmph_uint32)k[8]<<8);
|
||||
/* the first byte of hashes[2] is reserved for the length */
|
||||
case 8 :
|
||||
case 8 :
|
||||
hashes[1] +=((cmph_uint32)k[7]<<24);
|
||||
case 7 :
|
||||
case 7 :
|
||||
hashes[1] +=((cmph_uint32)k[6]<<16);
|
||||
case 6 :
|
||||
case 6 :
|
||||
hashes[1] +=((cmph_uint32)k[5]<<8);
|
||||
case 5 :
|
||||
hashes[1] +=(cmph_uint8) k[4];
|
||||
case 4 :
|
||||
case 4 :
|
||||
hashes[0] +=((cmph_uint32)k[3]<<24);
|
||||
case 3 :
|
||||
case 3 :
|
||||
hashes[0] +=((cmph_uint32)k[2]<<16);
|
||||
case 2 :
|
||||
case 2 :
|
||||
hashes[0] +=((cmph_uint32)k[1]<<8);
|
||||
case 1 :
|
||||
case 1 :
|
||||
hashes[0] +=(cmph_uint8)k[0];
|
||||
/* case 0: nothing left to add */
|
||||
}
|
||||
@ -158,13 +159,13 @@ cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keyl
|
||||
/* cmph_uint32 a, b, c;
|
||||
cmph_uint32 len, length;
|
||||
|
||||
// Set up the internal state
|
||||
// Set up the internal state
|
||||
length = keylen;
|
||||
len = length;
|
||||
a = b = 0x9e3779b9; // the golden ratio; an arbitrary value
|
||||
c = state->seed; // the previous hash value - seed in our case
|
||||
a = b = 0x9e3779b9; // the golden ratio; an arbitrary value
|
||||
c = state->seed; // the previous hash value - seed in our case
|
||||
|
||||
// handle most of the key
|
||||
// handle most of the key
|
||||
while (len >= 12)
|
||||
{
|
||||
a += (k[0] +((cmph_uint32)k[1]<<8) +((cmph_uint32)k[2]<<16) +((cmph_uint32)k[3]<<24));
|
||||
@ -176,37 +177,37 @@ cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keyl
|
||||
|
||||
// handle the last 11 bytes
|
||||
c += length;
|
||||
switch(len) /// all the case statements fall through
|
||||
switch(len) /// all the case statements fall through
|
||||
{
|
||||
case 11:
|
||||
case 11:
|
||||
c +=((cmph_uint32)k[10]<<24);
|
||||
case 10:
|
||||
case 10:
|
||||
c +=((cmph_uint32)k[9]<<16);
|
||||
case 9 :
|
||||
case 9 :
|
||||
c +=((cmph_uint32)k[8]<<8);
|
||||
// the first byte of c is reserved for the length
|
||||
case 8 :
|
||||
// the first byte of c is reserved for the length
|
||||
case 8 :
|
||||
b +=((cmph_uint32)k[7]<<24);
|
||||
case 7 :
|
||||
case 7 :
|
||||
b +=((cmph_uint32)k[6]<<16);
|
||||
case 6 :
|
||||
case 6 :
|
||||
b +=((cmph_uint32)k[5]<<8);
|
||||
case 5 :
|
||||
case 5 :
|
||||
b +=k[4];
|
||||
case 4 :
|
||||
case 4 :
|
||||
a +=((cmph_uint32)k[3]<<24);
|
||||
case 3 :
|
||||
case 3 :
|
||||
a +=((cmph_uint32)k[2]<<16);
|
||||
case 2 :
|
||||
case 2 :
|
||||
a +=((cmph_uint32)k[1]<<8);
|
||||
case 1 :
|
||||
case 1 :
|
||||
a +=k[0];
|
||||
// case 0: nothing left to add
|
||||
// case 0: nothing left to add
|
||||
}
|
||||
|
||||
mix(a,b,c);
|
||||
|
||||
/// report the result
|
||||
/// report the result
|
||||
|
||||
return c;
|
||||
*/
|
||||
@ -221,7 +222,7 @@ void jenkins_state_dump(jenkins_state_t *state, char **buf, cmph_uint32 *buflen)
|
||||
{
|
||||
*buflen = sizeof(cmph_uint32);
|
||||
*buf = (char *)malloc(sizeof(cmph_uint32));
|
||||
if (!*buf)
|
||||
if (!*buf)
|
||||
{
|
||||
*buflen = UINT_MAX;
|
||||
return;
|
||||
@ -252,7 +253,7 @@ jenkins_state_t *jenkins_state_load(const char *buf, cmph_uint32 buflen)
|
||||
/** \fn void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed);
|
||||
* \brief Support the ability to pack a jenkins function into a preallocated contiguous memory space pointed by jenkins_packed.
|
||||
* \param state points to the jenkins function
|
||||
* \param jenkins_packed pointer to the contiguous memory area used to store the jenkins function. The size of jenkins_packed must be at least jenkins_state_packed_size()
|
||||
* \param jenkins_packed pointer to the contiguous memory area used to store the jenkins function. The size of jenkins_packed must be at least jenkins_state_packed_size()
|
||||
*/
|
||||
void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed)
|
||||
{
|
||||
@ -265,7 +266,7 @@ void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed)
|
||||
/** \fn cmph_uint32 jenkins_state_packed_size(jenkins_state_t *state);
|
||||
* \brief Return the amount of space needed to pack a jenkins function.
|
||||
* \return the size of the packed function or zero for failures
|
||||
*/
|
||||
*/
|
||||
cmph_uint32 jenkins_state_packed_size(void)
|
||||
{
|
||||
return sizeof(cmph_uint32);
|
||||
|
@ -12,6 +12,7 @@ struct __linear_string_map_t {
|
||||
|
||||
lsmap_t *lsmap_new() {
|
||||
lsmap_t* lsmap = (lsmap_t*)malloc(sizeof(lsmap_t));
|
||||
if (!lsmap) return NULL;
|
||||
lsmap->key = "dummy node";
|
||||
lsmap->next = NULL;
|
||||
return lsmap;
|
||||
@ -42,7 +43,7 @@ void* lsmap_search(lsmap_t *lsmap, const char *key) {
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
void lsmap_foreach_key(lsmap_t *lsmap, void (*f)(const char*)) {
|
||||
while (lsmap->next != NULL) {
|
||||
f(lsmap->key);
|
||||
@ -65,4 +66,3 @@ void lsmap_destroy(lsmap_t *lsmap) {
|
||||
}
|
||||
free(lsmap);
|
||||
}
|
||||
|
||||
|
26
src/main.c
26
src/main.c
@ -22,13 +22,13 @@
|
||||
|
||||
void usage(const char *prg)
|
||||
{
|
||||
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c algorithm_dependent_value][-s seed] ] [-a algorithm] [-M memory_in_MB] [-b algorithm_dependent_value] [-t keys_per_bin] [-d tmp_dir] [-m file.mph] keysfile\n", prg);
|
||||
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c algorithm_dependent_value][-s seed] ] [-a algorithm] [-M memory_in_MB] [-b algorithm_dependent_value] [-t keys_per_bin] [-d tmp_dir] [-m file.mph] keysfile\n", prg);
|
||||
}
|
||||
void usage_long(const char *prg)
|
||||
{
|
||||
cmph_uint32 i;
|
||||
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c algorithm_dependent_value][-s seed] ] [-a algorithm] [-M memory_in_MB] [-b algorithm_dependent_value] [-t keys_per_bin] [-d tmp_dir] [-m file.mph] keysfile\n", prg);
|
||||
fprintf(stderr, "Minimum perfect hashing tool\n\n");
|
||||
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c algorithm_dependent_value][-s seed] ] [-a algorithm] [-M memory_in_MB] [-b algorithm_dependent_value] [-t keys_per_bin] [-d tmp_dir] [-m file.mph] keysfile\n", prg);
|
||||
fprintf(stderr, "Minimum perfect hashing tool\n\n");
|
||||
fprintf(stderr, " -h\t print this help message\n");
|
||||
fprintf(stderr, " -c\t c value determines:\n");
|
||||
fprintf(stderr, " \t * the number of vertices in the graph for the algorithms BMZ and CHM\n");
|
||||
@ -57,7 +57,7 @@ void usage_long(const char *prg)
|
||||
fprintf(stderr, " \t and its value should be an integer in the range [1,32]. Default is 4. The\n");
|
||||
fprintf(stderr, " \t larger is this value, the slower is the construction of the functions.\n");
|
||||
fprintf(stderr, " \t This parameter has no effect for other algorithms.\n\n");
|
||||
fprintf(stderr, " -t\t set the number of keys per bin for a t-perfect hashing function. A t-perfect\n");
|
||||
fprintf(stderr, " -t\t set the number of keys per bin for a t-perfect hashing function. A t-perfect\n");
|
||||
fprintf(stderr, " \t hash function allows at most t collisions in a given bin. This parameter applies\n");
|
||||
fprintf(stderr, " \t only to the CHD and CHD_PH algorithms. Its value should be an integer in the\n");
|
||||
fprintf(stderr, " \t range [1,128]. Defaul is 1\n");
|
||||
@ -182,7 +182,7 @@ int main(int argc, char **argv)
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!valid)
|
||||
if (!valid)
|
||||
{
|
||||
fprintf(stderr, "Invalid mph algorithm: %s. It is not available in version %s\n", optarg, VERSION);
|
||||
return -1;
|
||||
@ -204,7 +204,7 @@ int main(int argc, char **argv)
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!valid)
|
||||
if (!valid)
|
||||
{
|
||||
fprintf(stderr, "Invalid hash function: %s\n", optarg);
|
||||
return -1;
|
||||
@ -223,7 +223,7 @@ int main(int argc, char **argv)
|
||||
return 1;
|
||||
}
|
||||
keys_file = argv[optind];
|
||||
|
||||
|
||||
if (seed == UINT_MAX) seed = (cmph_uint32)time(NULL);
|
||||
srand(seed);
|
||||
int ret = 0;
|
||||
@ -232,7 +232,7 @@ int main(int argc, char **argv)
|
||||
mphf_file = (char *)malloc(strlen(keys_file) + 5);
|
||||
memcpy(mphf_file, keys_file, strlen(keys_file));
|
||||
memcpy(mphf_file + strlen(keys_file), ".mph\0", (size_t)5);
|
||||
}
|
||||
}
|
||||
|
||||
keys_fd = fopen(keys_file, "r");
|
||||
|
||||
@ -258,7 +258,7 @@ int main(int argc, char **argv)
|
||||
cmph_config_set_memory_availability(config, memory_availability);
|
||||
cmph_config_set_b(config, b);
|
||||
cmph_config_set_keys_per_bin(config, keys_per_bin);
|
||||
|
||||
|
||||
//if((mph_algo == CMPH_BMZ || mph_algo == CMPH_BRZ) && c >= 2.0) c=1.15;
|
||||
if(mph_algo == CMPH_BMZ && c >= 2.0) c=1.15;
|
||||
if (c != 0) cmph_config_set_graphsize(config, c);
|
||||
@ -279,8 +279,8 @@ int main(int argc, char **argv)
|
||||
free(mphf_file);
|
||||
return -1;
|
||||
}
|
||||
cmph_dump(mphf, mphf_fd);
|
||||
cmph_destroy(mphf);
|
||||
cmph_dump(mphf, mphf_fd);
|
||||
cmph_destroy(mphf);
|
||||
fclose(mphf_fd);
|
||||
}
|
||||
else
|
||||
@ -329,7 +329,7 @@ int main(int argc, char **argv)
|
||||
}
|
||||
source->dispose(source->data, buf, buflen);
|
||||
}
|
||||
|
||||
|
||||
cmph_destroy(mphf);
|
||||
free(hashtable);
|
||||
}
|
||||
@ -338,5 +338,5 @@ int main(int argc, char **argv)
|
||||
free(tmp_dir);
|
||||
cmph_io_nlfile_adapter_destroy(source);
|
||||
return ret;
|
||||
|
||||
|
||||
}
|
||||
|
@ -4,6 +4,7 @@
|
||||
sdbm_state_t *sdbm_state_new()
|
||||
{
|
||||
sdbm_state_t *state = (sdbm_state_t *)malloc(sizeof(sdbm_state_t));
|
||||
if (!state) return NULL;
|
||||
state->hashfunc = CMPH_HASH_SDBM;
|
||||
return state;
|
||||
}
|
||||
|
@ -12,7 +12,7 @@ vqueue_t * vqueue_new(cmph_uint32 capacity)
|
||||
{
|
||||
size_t capacity_plus_one = capacity + 1;
|
||||
vqueue_t *q = (vqueue_t *)malloc(sizeof(vqueue_t));
|
||||
assert(q);
|
||||
if (!q) return NULL;
|
||||
q->values = (cmph_uint32 *)calloc(capacity_plus_one, sizeof(cmph_uint32));
|
||||
q->beg = q->end = 0;
|
||||
q->capacity = (cmph_uint32) capacity_plus_one;
|
||||
@ -43,7 +43,7 @@ void vqueue_print(vqueue_t * q)
|
||||
cmph_uint32 i;
|
||||
for (i = q->beg; i != q->end; i = (i + 1)%q->capacity)
|
||||
fprintf(stderr, "%u\n", q->values[(i + 1)%q->capacity]);
|
||||
}
|
||||
}
|
||||
|
||||
void vqueue_destroy(vqueue_t *q)
|
||||
{
|
||||
|
@ -76,4 +76,3 @@ void vstack_reserve(vstack_t *stack, cmph_uint32 size)
|
||||
DEBUGP("Increased\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user