diff --git a/src/bdz.c b/src/bdz.c index 7629a6c..2c0de90 100755 --- a/src/bdz.c +++ b/src/bdz.c @@ -35,9 +35,9 @@ const cmph_uint8 bdz_lookup_table[] = 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1, 0 -}; +}; -typedef struct +typedef struct { cmph_uint32 vertices[3]; cmph_uint32 next_edges[3]; @@ -54,12 +54,12 @@ static void bdz_free_queue(bdz_queue_t * queue) free(*queue); }; -typedef struct +typedef struct { cmph_uint32 nedges; bdz_edge_t * edges; cmph_uint32 * first_edge; - cmph_uint8 * vert_degree; + cmph_uint8 * vert_degree; }bdz_graph3_t; @@ -67,7 +67,7 @@ static void bdz_alloc_graph3(bdz_graph3_t * graph3, cmph_uint32 nedges, cmph_uin { graph3->edges=malloc(nedges*sizeof(bdz_edge_t)); graph3->first_edge=malloc(nvertices*sizeof(cmph_uint32)); - graph3->vert_degree=malloc((size_t)nvertices); + graph3->vert_degree=malloc((size_t)nvertices); }; static void bdz_init_graph3(bdz_graph3_t * graph3, cmph_uint32 nedges, cmph_uint32 nvertices) { @@ -136,7 +136,7 @@ static void bdz_remove_edge(bdz_graph3_t * graph3, cmph_uint32 curr_edge) j=0; } else if(graph3->edges[edge1].vertices[1]==vert){ j=1; - } else + } else j=2; edge1=graph3->edges[edge1].next_edges[j]; }; @@ -145,16 +145,16 @@ static void bdz_remove_edge(bdz_graph3_t * graph3, cmph_uint32 curr_edge) bdz_dump_graph(graph3,graph3->nedges,graph3->nedges+graph3->nedges/4); exit(-1); }; - + if(edge2!=NULL_EDGE){ - graph3->edges[edge2].next_edges[j] = + graph3->edges[edge2].next_edges[j] = graph3->edges[edge1].next_edges[i]; - } else + } else graph3->first_edge[vert]= graph3->edges[edge1].next_edges[i]; graph3->vert_degree[vert]--; }; - + }; static int bdz_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_queue_t queue, bdz_graph3_t* graph3) @@ -170,7 +170,7 @@ static int bdz_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_que v0=graph3->edges[i].vertices[0]; v1=graph3->edges[i].vertices[1]; v2=graph3->edges[i].vertices[2]; - if(graph3->vert_degree[v0]==1 || + if(graph3->vert_degree[v0]==1 || graph3->vert_degree[v1]==1 || graph3->vert_degree[v2]==1){ if(!GETBIT(marked_edge,i)) { @@ -196,7 +196,7 @@ static int bdz_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_que queue[queue_head++]=tmp_edge; SETBIT(marked_edge,tmp_edge); }; - + }; if(graph3->vert_degree[v1]==1) { tmp_edge=graph3->first_edge[v1]; @@ -204,7 +204,7 @@ static int bdz_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_que queue[queue_head++]=tmp_edge; SETBIT(marked_edge,tmp_edge); }; - + }; if(graph3->vert_degree[v2]==1){ tmp_edge=graph3->first_edge[v2]; @@ -227,7 +227,7 @@ bdz_config_data_t *bdz_config_new(void) { bdz_config_data_t *bdz; bdz = (bdz_config_data_t *)malloc(sizeof(bdz_config_data_t)); - assert(bdz); + if (!bdz) return NULL; memset(bdz, 0, sizeof(bdz_config_data_t)); bdz->hashfunc = CMPH_HASH_JENKINS; bdz->g = NULL; @@ -328,10 +328,10 @@ cmph_t *bdz_new(cmph_config_t *mph, double c) fprintf(stderr, "acyclic graph creation failure - %u iterations remaining\n", iterations); } if (iterations == 0) break; - } + } else break; } - + if (iterations == 0) { bdz_free_queue(&edges); @@ -353,7 +353,7 @@ cmph_t *bdz_new(cmph_config_t *mph, double c) fprintf(stderr, "Entering ranking step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n); } ranking(bdz); - #ifdef CMPH_TIMING + #ifdef CMPH_TIMING ELAPSED_TIME_IN_SECONDS(&construction_time); #endif mphf = (cmph_t *)malloc(sizeof(cmph_t)); @@ -381,17 +381,17 @@ cmph_t *bdz_new(cmph_config_t *mph, double c) } - #ifdef CMPH_TIMING + #ifdef CMPH_TIMING register cmph_uint32 space_usage = bdz_packed_size(mphf)*8; register cmph_uint32 keys_per_bucket = 1; construction_time = construction_time - construction_time_begin; fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", bdz->m, bdz->m/(double)bdz->n, keys_per_bucket, construction_time, space_usage/(double)bdz->m); - #endif + #endif return mphf; } - + static int bdz_mapping(cmph_config_t *mph, bdz_graph3_t* graph3, bdz_queue_t queue) { cmph_uint32 e; @@ -405,7 +405,7 @@ static int bdz_mapping(cmph_config_t *mph, bdz_graph3_t* graph3, bdz_queue_t que cmph_uint32 h0, h1, h2; cmph_uint32 keylen; char *key = NULL; - mph->key_source->read(mph->key_source->data, &key, &keylen); + mph->key_source->read(mph->key_source->data, &key, &keylen); hash_vector(bdz->hl, key, keylen,hl); h0 = hl[0] % bdz->r; h1 = hl[1] % bdz->r + bdz->r; @@ -414,7 +414,7 @@ static int bdz_mapping(cmph_config_t *mph, bdz_graph3_t* graph3, bdz_queue_t que mph->key_source->dispose(mph->key_source->data, key, keylen); bdz_add_edge(graph3,h0,h1,h2); } - cycles = bdz_generate_queue(bdz->m, bdz->n, queue, graph3); + cycles = bdz_generate_queue(bdz->m, bdz->n, queue, graph3); return (cycles == 0); } @@ -426,7 +426,7 @@ static void assigning(bdz_config_data_t *bdz, bdz_graph3_t* graph3, bdz_queue_t cmph_uint32 v0,v1,v2; cmph_uint8 * marked_vertices =malloc((size_t)(bdz->n >> 3) + 1); cmph_uint32 sizeg = (cmph_uint32)ceil(bdz->n/4.0); - bdz->g = (cmph_uint8 *)calloc((size_t)(sizeg), sizeof(cmph_uint8)); + bdz->g = (cmph_uint8 *)calloc((size_t)(sizeg), sizeof(cmph_uint8)); memset(marked_vertices, 0, (size_t)(bdz->n >> 3) + 1); memset(bdz->g, 0xff, (size_t)(sizeg)); @@ -439,12 +439,12 @@ static void assigning(bdz_config_data_t *bdz, bdz_graph3_t* graph3, bdz_queue_t if(!GETBIT(marked_vertices, v0)){ if(!GETBIT(marked_vertices,v1)) { - SETVALUE1(bdz->g, v1, UNASSIGNED); + SETVALUE1(bdz->g, v1, UNASSIGNED); SETBIT(marked_vertices, v1); } if(!GETBIT(marked_vertices,v2)) { - SETVALUE1(bdz->g, v2, UNASSIGNED); + SETVALUE1(bdz->g, v2, UNASSIGNED); SETBIT(marked_vertices, v2); } SETVALUE1(bdz->g, v0, (6-(GETVALUE(bdz->g, v1) + GETVALUE(bdz->g,v2)))%3); @@ -507,7 +507,7 @@ int bdz_dump(cmph_t *mphf, FILE *fd) nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd); nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd); nbytes = fwrite(&(data->r), sizeof(cmph_uint32), (size_t)1, fd); - + cmph_uint32 sizeg = (cmph_uint32)ceil(data->n/4.0); nbytes = fwrite(data->g, sizeof(cmph_uint8)*sizeg, (size_t)1, fd); @@ -541,12 +541,12 @@ void bdz_load(FILE *f, cmph_t *mphf) nbytes = fread(buf, (size_t)buflen, (size_t)1, f); bdz->hl = hash_state_load(buf, buflen); free(buf); - + DEBUGP("Reading m and n\n"); - nbytes = fread(&(bdz->n), sizeof(cmph_uint32), (size_t)1, f); - nbytes = fread(&(bdz->m), sizeof(cmph_uint32), (size_t)1, f); - nbytes = fread(&(bdz->r), sizeof(cmph_uint32), (size_t)1, f); + nbytes = fread(&(bdz->n), sizeof(cmph_uint32), (size_t)1, f); + nbytes = fread(&(bdz->m), sizeof(cmph_uint32), (size_t)1, f); + nbytes = fread(&(bdz->r), sizeof(cmph_uint32), (size_t)1, f); sizeg = (cmph_uint32)ceil(bdz->n/4.0); bdz->g = (cmph_uint8 *)calloc((size_t)(sizeg), sizeof(cmph_uint8)); nbytes = fread(bdz->g, sizeg*sizeof(cmph_uint8), (size_t)1, f); @@ -566,7 +566,7 @@ void bdz_load(FILE *f, cmph_t *mphf) #endif return; } - + static inline cmph_uint32 rank(cmph_uint32 b, cmph_uint32 * ranktable, cmph_uint8 * g, cmph_uint32 vertex) { @@ -578,17 +578,17 @@ static inline cmph_uint32 rank(cmph_uint32 b, cmph_uint32 * ranktable, cmph_uint while(beg_idx_b < end_idx_b) { base_rank += bdz_lookup_table[*(g + beg_idx_b++)]; - + } DEBUGP("base rank %u\n", base_rank); beg_idx_v = beg_idx_b << 2; DEBUGP("beg_idx_v %u\n", beg_idx_v); - while(beg_idx_v < vertex) + while(beg_idx_v < vertex) { if(GETVALUE(g, beg_idx_v) != UNASSIGNED) base_rank++; beg_idx_v++; } - + return base_rank; } @@ -610,7 +610,7 @@ cmph_uint32 bdz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen) void bdz_destroy(cmph_t *mphf) { bdz_data_t *data = (bdz_data_t *)mphf->data; - free(data->g); + free(data->g); hash_state_destroy(data->hl); free(data->ranktable); free(data); @@ -660,18 +660,18 @@ void bdz_pack(cmph_t *mphf, void *packed_mphf) * \brief Return the amount of space needed to pack mphf. * \param mphf pointer to a mphf * \return the size of the packed function or zero for failures - */ + */ cmph_uint32 bdz_packed_size(cmph_t *mphf) { bdz_data_t *data = (bdz_data_t *)mphf->data; - CMPH_HASH hl_type = hash_get_type(data->hl); + CMPH_HASH hl_type = hash_get_type(data->hl); return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(hl_type) + 3*sizeof(cmph_uint32) + sizeof(cmph_uint32)*(data->ranktablesize) + sizeof(cmph_uint8) + sizeof(cmph_uint8)* (cmph_uint32)(ceil(data->n/4.0))); } /** cmph_uint32 bdz_search(void *packed_mphf, const char *key, cmph_uint32 keylen); - * \brief Use the packed mphf to do a search. + * \brief Use the packed mphf to do a search. * \param packed_mphf pointer to the packed mphf * \param key key to be hashed * \param keylen key legth in bytes @@ -679,13 +679,13 @@ cmph_uint32 bdz_packed_size(cmph_t *mphf) */ cmph_uint32 bdz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen) { - + register cmph_uint32 vertex; register CMPH_HASH hl_type = *(cmph_uint32 *)packed_mphf; register cmph_uint8 *hl_ptr = (cmph_uint8 *)(packed_mphf) + 4; register cmph_uint32 *ranktable = (cmph_uint32*)(hl_ptr + hash_state_packed_size(hl_type)); - + register cmph_uint32 r = *ranktable++; register cmph_uint32 ranktablesize = *ranktable++; register cmph_uint8 * g = (cmph_uint8 *)(ranktable + ranktablesize); diff --git a/src/bdz_ph.c b/src/bdz_ph.c index 16257c0..ad52d78 100755 --- a/src/bdz_ph.c +++ b/src/bdz_ph.c @@ -24,7 +24,7 @@ static cmph_uint8 lookup_table[5][256] = { {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, }; -typedef struct +typedef struct { cmph_uint32 vertices[3]; cmph_uint32 next_edges[3]; @@ -41,12 +41,12 @@ static void bdz_ph_free_queue(bdz_ph_queue_t * queue) free(*queue); }; -typedef struct +typedef struct { cmph_uint32 nedges; bdz_ph_edge_t * edges; cmph_uint32 * first_edge; - cmph_uint8 * vert_degree; + cmph_uint8 * vert_degree; }bdz_ph_graph3_t; @@ -54,7 +54,7 @@ static void bdz_ph_alloc_graph3(bdz_ph_graph3_t * graph3, cmph_uint32 nedges, cm { graph3->edges=malloc(nedges*sizeof(bdz_ph_edge_t)); graph3->first_edge=malloc(nvertices*sizeof(cmph_uint32)); - graph3->vert_degree=malloc((size_t)nvertices); + graph3->vert_degree=malloc((size_t)nvertices); }; static void bdz_ph_init_graph3(bdz_ph_graph3_t * graph3, cmph_uint32 nedges, cmph_uint32 nvertices) { @@ -101,10 +101,10 @@ static void bdz_ph_dump_graph(bdz_ph_graph3_t* graph3, cmph_uint32 nedges, cmph_ printf(" nexts %d %d %d",graph3->edges[i].next_edges[0], graph3->edges[i].next_edges[1],graph3->edges[i].next_edges[2]); }; - + for(i=0;ifirst_edge[i]); - + }; }; @@ -121,7 +121,7 @@ static void bdz_ph_remove_edge(bdz_ph_graph3_t * graph3, cmph_uint32 curr_edge) j=0; } else if(graph3->edges[edge1].vertices[1]==vert){ j=1; - } else + } else j=2; edge1=graph3->edges[edge1].next_edges[j]; }; @@ -130,16 +130,16 @@ static void bdz_ph_remove_edge(bdz_ph_graph3_t * graph3, cmph_uint32 curr_edge) bdz_ph_dump_graph(graph3,graph3->nedges,graph3->nedges+graph3->nedges/4); exit(-1); }; - + if(edge2!=NULL_EDGE){ - graph3->edges[edge2].next_edges[j] = + graph3->edges[edge2].next_edges[j] = graph3->edges[edge1].next_edges[i]; - } else + } else graph3->first_edge[vert]= graph3->edges[edge1].next_edges[i]; graph3->vert_degree[vert]--; }; - + }; static int bdz_ph_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_ph_queue_t queue, bdz_ph_graph3_t* graph3) @@ -176,7 +176,7 @@ static int bdz_ph_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_ queue[queue_head++]=tmp_edge; SETBIT(marked_edge,tmp_edge); }; - + }; if(graph3->vert_degree[v1]==1) { tmp_edge=graph3->first_edge[v1]; @@ -184,7 +184,7 @@ static int bdz_ph_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_ queue[queue_head++]=tmp_edge; SETBIT(marked_edge,tmp_edge); }; - + }; if(graph3->vert_degree[v2]==1){ tmp_edge=graph3->first_edge[v2]; @@ -229,7 +229,7 @@ void bdz_ph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs) while(*hashptr != CMPH_HASH_COUNT) { if (i >= 1) break; //bdz_ph only uses one linear hash function - bdz_ph->hashfunc = *hashptr; + bdz_ph->hashfunc = *hashptr; ++i, ++hashptr; } } @@ -251,16 +251,16 @@ cmph_t *bdz_ph_new(cmph_config_t *mph, double c) if (c == 0) c = 1.23; // validating restrictions over parameter c. DEBUGP("c: %f\n", c); - bdz_ph->m = mph->key_source->nkeys; - bdz_ph->r = (cmph_uint32)ceil((c * mph->key_source->nkeys)/3); + bdz_ph->m = mph->key_source->nkeys; + bdz_ph->r = (cmph_uint32)ceil((c * mph->key_source->nkeys)/3); if ((bdz_ph->r % 2) == 0) bdz_ph->r += 1; bdz_ph->n = 3*bdz_ph->r; - + bdz_ph_alloc_graph3(&graph3, bdz_ph->m, bdz_ph->n); bdz_ph_alloc_queue(&edges,bdz_ph->m); DEBUGP("Created hypergraph\n"); - + DEBUGP("m (edges): %u n (vertices): %u r: %u c: %f \n", bdz_ph->m, bdz_ph->n, bdz_ph->r, c); // Mapping step @@ -287,10 +287,10 @@ cmph_t *bdz_ph_new(cmph_config_t *mph, double c) fprintf(stderr, "acyclic graph creation failure - %u iterations remaining\n", iterations); } if (iterations == 0) break; - } + } else break; } - + if (iterations == 0) { // free(bdz_ph->g); @@ -308,7 +308,7 @@ cmph_t *bdz_ph_new(cmph_config_t *mph, double c) bdz_ph_free_queue(&edges); bdz_ph_free_graph3(&graph3); - + if (mph->verbosity) { fprintf(stderr, "Starting optimization step\n"); @@ -338,23 +338,23 @@ cmph_t *bdz_ph_new(cmph_config_t *mph, double c) fprintf(stderr, "Successfully generated minimal perfect hash function\n"); } - #ifdef CMPH_TIMING + #ifdef CMPH_TIMING register cmph_uint32 space_usage = bdz_ph_packed_size(mphf)*8; register cmph_uint32 keys_per_bucket = 1; construction_time = construction_time - construction_time_begin; fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", bdz_ph->m, bdz_ph->m/(double)bdz_ph->n, keys_per_bucket, construction_time, space_usage/(double)bdz_ph->m); - #endif + #endif return mphf; } - + static int bdz_ph_mapping(cmph_config_t *mph, bdz_ph_graph3_t* graph3, bdz_ph_queue_t queue) { cmph_uint32 e; int cycles = 0; cmph_uint32 hl[3]; - + bdz_ph_config_data_t *bdz_ph = (bdz_ph_config_data_t *)mph->data; bdz_ph_init_graph3(graph3, bdz_ph->m, bdz_ph->n); mph->key_source->rewind(mph->key_source->data); @@ -363,7 +363,7 @@ static int bdz_ph_mapping(cmph_config_t *mph, bdz_ph_graph3_t* graph3, bdz_ph_qu cmph_uint32 h0, h1, h2; cmph_uint32 keylen; char *key = NULL; - mph->key_source->read(mph->key_source->data, &key, &keylen); + mph->key_source->read(mph->key_source->data, &key, &keylen); hash_vector(bdz_ph->hl, key, keylen, hl); h0 = hl[0] % bdz_ph->r; h1 = hl[1] % bdz_ph->r + bdz_ph->r; @@ -371,7 +371,7 @@ static int bdz_ph_mapping(cmph_config_t *mph, bdz_ph_graph3_t* graph3, bdz_ph_qu mph->key_source->dispose(mph->key_source->data, key, keylen); bdz_ph_add_edge(graph3,h0,h1,h2); } - cycles = bdz_ph_generate_queue(bdz_ph->m, bdz_ph->n, queue, graph3); + cycles = bdz_ph_generate_queue(bdz_ph->m, bdz_ph->n, queue, graph3); return (cycles == 0); } @@ -383,7 +383,7 @@ static void assigning(bdz_ph_config_data_t *bdz_ph, bdz_ph_graph3_t* graph3, bdz cmph_uint32 v0,v1,v2; cmph_uint8 * marked_vertices =malloc((size_t)(bdz_ph->n >> 3) + 1); cmph_uint32 sizeg = (cmph_uint32)ceil(bdz_ph->n/4.0); - bdz_ph->g = (cmph_uint8 *)calloc((size_t)sizeg, sizeof(cmph_uint8)); + bdz_ph->g = (cmph_uint8 *)calloc((size_t)sizeg, sizeof(cmph_uint8)); memset(marked_vertices, 0, (size_t)(bdz_ph->n >> 3) + 1); //memset(bdz_ph->g, 0xff, sizeg); @@ -396,14 +396,14 @@ static void assigning(bdz_ph_config_data_t *bdz_ph, bdz_ph_graph3_t* graph3, bdz if(!GETBIT(marked_vertices, v0)){ if(!GETBIT(marked_vertices,v1)) { - //SETVALUE(bdz_ph->g, v1, UNASSIGNED); + //SETVALUE(bdz_ph->g, v1, UNASSIGNED); SETBIT(marked_vertices, v1); } if(!GETBIT(marked_vertices,v2)) { - //SETVALUE(bdz_ph->g, v2, UNASSIGNED); + //SETVALUE(bdz_ph->g, v2, UNASSIGNED); SETBIT(marked_vertices, v2); - } + } SETVALUE0(bdz_ph->g, v0, (6-(GETVALUE(bdz_ph->g, v1) + GETVALUE(bdz_ph->g,v2)))%3); SETBIT(marked_vertices, v0); } else if(!GETBIT(marked_vertices, v1)) { @@ -417,7 +417,7 @@ static void assigning(bdz_ph_config_data_t *bdz_ph, bdz_ph_graph3_t* graph3, bdz }else { SETVALUE0(bdz_ph->g, v2, (8-(GETVALUE(bdz_ph->g,v0)+GETVALUE(bdz_ph->g, v1)))%3); SETBIT(marked_vertices, v2); - } + } DEBUGP("A:%u %u %u -- %u %u %u\n", v0, v1, v2, GETVALUE(bdz_ph->g, v0), GETVALUE(bdz_ph->g, v1), GETVALUE(bdz_ph->g, v2)); }; free(marked_vertices); @@ -428,11 +428,11 @@ static void bdz_ph_optimization(bdz_ph_config_data_t *bdz_ph) cmph_uint32 i; cmph_uint8 byte = 0; cmph_uint32 sizeg = (cmph_uint32)ceil(bdz_ph->n/5.0); - cmph_uint8 * new_g = (cmph_uint8 *)calloc((size_t)sizeg, sizeof(cmph_uint8)); + cmph_uint8 * new_g = (cmph_uint8 *)calloc((size_t)sizeg, sizeof(cmph_uint8)); cmph_uint8 value; cmph_uint32 idx; - for(i = 0; i < bdz_ph->n; i++) - { + for(i = 0; i < bdz_ph->n; i++) + { idx = i/5; byte = new_g[idx]; value = GETVALUE(bdz_ph->g, i); @@ -462,7 +462,7 @@ int bdz_ph_dump(cmph_t *mphf, FILE *fd) nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd); nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd); nbytes = fwrite(&(data->r), sizeof(cmph_uint32), (size_t)1, fd); - sizeg = (cmph_uint32)ceil(data->n/5.0); + sizeg = (cmph_uint32)ceil(data->n/5.0); nbytes = fwrite(data->g, sizeof(cmph_uint8)*sizeg, (size_t)1, fd); #ifdef DEBUG @@ -491,19 +491,19 @@ void bdz_ph_load(FILE *f, cmph_t *mphf) nbytes = fread(buf, (size_t)buflen, (size_t)1, f); bdz_ph->hl = hash_state_load(buf, buflen); free(buf); - + DEBUGP("Reading m and n\n"); - nbytes = fread(&(bdz_ph->n), sizeof(cmph_uint32), (size_t)1, f); - nbytes = fread(&(bdz_ph->m), sizeof(cmph_uint32), (size_t)1, f); - nbytes = fread(&(bdz_ph->r), sizeof(cmph_uint32), (size_t)1, f); + nbytes = fread(&(bdz_ph->n), sizeof(cmph_uint32), (size_t)1, f); + nbytes = fread(&(bdz_ph->m), sizeof(cmph_uint32), (size_t)1, f); + nbytes = fread(&(bdz_ph->r), sizeof(cmph_uint32), (size_t)1, f); sizeg = (cmph_uint32)ceil(bdz_ph->n/5.0); bdz_ph->g = (cmph_uint8 *)calloc((size_t)sizeg, sizeof(cmph_uint8)); nbytes = fread(bdz_ph->g, sizeg*sizeof(cmph_uint8), (size_t)1, f); return; } - + cmph_uint32 bdz_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen) { @@ -520,12 +520,12 @@ cmph_uint32 bdz_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen) byte0 = bdz_ph->g[hl[0]/5]; byte1 = bdz_ph->g[hl[1]/5]; byte2 = bdz_ph->g[hl[2]/5]; - + byte0 = lookup_table[hl[0]%5U][byte0]; byte1 = lookup_table[hl[1]%5U][byte1]; byte2 = lookup_table[hl[2]%5U][byte2]; vertex = hl[(byte0 + byte1 + byte2)%3]; - + return vertex; } @@ -533,7 +533,7 @@ cmph_uint32 bdz_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen) void bdz_ph_destroy(cmph_t *mphf) { bdz_ph_data_t *data = (bdz_ph_data_t *)mphf->data; - free(data->g); + free(data->g); hash_state_destroy(data->hl); free(data); free(mphf); @@ -571,17 +571,17 @@ void bdz_ph_pack(cmph_t *mphf, void *packed_mphf) * \brief Return the amount of space needed to pack mphf. * \param mphf pointer to a mphf * \return the size of the packed function or zero for failures - */ + */ cmph_uint32 bdz_ph_packed_size(cmph_t *mphf) { bdz_ph_data_t *data = (bdz_ph_data_t *)mphf->data; - CMPH_HASH hl_type = hash_get_type(data->hl); + CMPH_HASH hl_type = hash_get_type(data->hl); cmph_uint32 sizeg = (cmph_uint32)ceil(data->n/5.0); return (cmph_uint32) (sizeof(CMPH_ALGO) + hash_state_packed_size(hl_type) + 2*sizeof(cmph_uint32) + sizeof(cmph_uint8)*sizeg); } /** cmph_uint32 bdz_ph_search(void *packed_mphf, const char *key, cmph_uint32 keylen); - * \brief Use the packed mphf to do a search. + * \brief Use the packed mphf to do a search. * \param packed_mphf pointer to the packed mphf * \param key key to be hashed * \param keylen key legth in bytes @@ -589,21 +589,21 @@ cmph_uint32 bdz_ph_packed_size(cmph_t *mphf) */ cmph_uint32 bdz_ph_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen) { - + register CMPH_HASH hl_type = *(cmph_uint32 *)packed_mphf; register cmph_uint8 *hl_ptr = (cmph_uint8 *)(packed_mphf) + 4; - + register cmph_uint8 * ptr = hl_ptr + hash_state_packed_size(hl_type); register cmph_uint32 r = *((cmph_uint32*) ptr); register cmph_uint8 * g = ptr + 4; - + cmph_uint32 hl[3]; register cmph_uint8 byte0, byte1, byte2; register cmph_uint32 vertex; hash_vector_packed(hl_ptr, hl_type, key, keylen, hl); - + hl[0] = hl[0] % r; hl[1] = hl[1] % r + r; hl[2] = hl[2] % r + (r << 1); @@ -611,11 +611,11 @@ cmph_uint32 bdz_ph_search_packed(void *packed_mphf, const char *key, cmph_uint32 byte0 = g[hl[0]/5]; byte1 = g[hl[1]/5]; byte2 = g[hl[2]/5]; - + byte0 = lookup_table[hl[0]%5][byte0]; byte1 = lookup_table[hl[1]%5][byte1]; byte2 = lookup_table[hl[2]%5][byte2]; vertex = hl[(byte0 + byte1 + byte2)%3]; - + return vertex; } diff --git a/src/bm_numbers.c b/src/bm_numbers.c index cd3aa74..4ede2d7 100644 --- a/src/bm_numbers.c +++ b/src/bm_numbers.c @@ -128,4 +128,3 @@ int main(int argc, char** argv) { lsmap_destroy(g_created_mphf); return 0; } - diff --git a/src/bmz.c b/src/bmz.c index 51c7785..eb3190e 100644 --- a/src/bmz.c +++ b/src/bmz.c @@ -24,7 +24,7 @@ bmz_config_data_t *bmz_config_new(void) { bmz_config_data_t *bmz = NULL; bmz = (bmz_config_data_t *)malloc(sizeof(bmz_config_data_t)); - assert(bmz); + if (!bmz) return NULL; memset(bmz, 0, sizeof(bmz_config_data_t)); bmz->hashfuncs[0] = CMPH_HASH_JENKINS; bmz->hashfuncs[1] = CMPH_HASH_JENKINS; @@ -49,7 +49,7 @@ void bmz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs) while(*hashptr != CMPH_HASH_COUNT) { if (i >= 2) break; //bmz only uses two hash functions - bmz->hashfuncs[i] = *hashptr; + bmz->hashfuncs[i] = *hashptr; ++i, ++hashptr; } } @@ -68,8 +68,8 @@ cmph_t *bmz_new(cmph_config_t *mph, double c) bmz_config_data_t *bmz = (bmz_config_data_t *)mph->data; if (c == 0) c = 1.15; // validating restrictions over parameter c. DEBUGP("c: %f\n", c); - bmz->m = mph->key_source->nkeys; - bmz->n = (cmph_uint32)ceil(c * mph->key_source->nkeys); + bmz->m = mph->key_source->nkeys; + bmz->n = (cmph_uint32)ceil(c * mph->key_source->nkeys); DEBUGP("m (edges): %u n (vertices): %u c: %f\n", bmz->m, bmz->n, c); bmz->graph = graph_new(bmz->n, bmz->m); DEBUGP("Created graph\n"); @@ -81,7 +81,7 @@ cmph_t *bmz_new(cmph_config_t *mph, double c) { // Mapping step cmph_uint32 biggest_g_value = 0; - cmph_uint32 biggest_edge_value = 1; + cmph_uint32 biggest_edge_value = 1; iterations = 100; if (mph->verbosity) { @@ -109,12 +109,12 @@ cmph_t *bmz_new(cmph_config_t *mph, double c) fprintf(stderr, "simple graph creation failure - %u iterations remaining\n", iterations); } if (iterations == 0) break; - } + } else break; } if (iterations == 0) { - graph_destroy(bmz->graph); + graph_destroy(bmz->graph); return NULL; } // Ordering step @@ -155,17 +155,17 @@ cmph_t *bmz_new(cmph_config_t *mph, double c) } bmz_traverse_non_critical_nodes(bmz, used_edges, visited); // non_critical_nodes } - else + else { iterations_map--; if (mph->verbosity) fprintf(stderr, "Restarting mapping step. %u iterations remaining.\n", iterations_map); - } + } free(used_edges); free(visited); } while(restart_mapping && iterations_map > 0); graph_destroy(bmz->graph); bmz->graph = NULL; - if (iterations_map == 0) + if (iterations_map == 0) { return NULL; } @@ -212,15 +212,15 @@ static cmph_uint8 bmz_traverse_critical_nodes(bmz_config_data_t *bmz, cmph_uint3 while(!vqueue_is_empty(q)) { v = vqueue_remove(q); - it = graph_neighbors_it(bmz->graph, v); + it = graph_neighbors_it(bmz->graph, v); while ((u = graph_next_neighbor(bmz->graph, &it)) != GRAPH_NO_NEIGHBOR) - { + { if (graph_node_is_critical(bmz->graph, u) && (!GETBIT(visited,u))) { collision = 1; while(collision) // lookahead to resolve collisions { - next_g = *biggest_g_value + 1; + next_g = *biggest_g_value + 1; it1 = graph_neighbors_it(bmz->graph, u); collision = 0; while((lav = graph_next_neighbor(bmz->graph, &it1)) != GRAPH_NO_NEIGHBOR) @@ -232,7 +232,7 @@ static cmph_uint8 bmz_traverse_critical_nodes(bmz_config_data_t *bmz, cmph_uint3 vqueue_destroy(q); return 1; // restart mapping step. } - if (GETBIT(used_edges, (next_g + bmz->g[lav]))) + if (GETBIT(used_edges, (next_g + bmz->g[lav]))) { collision = 1; break; @@ -240,7 +240,7 @@ static cmph_uint8 bmz_traverse_critical_nodes(bmz_config_data_t *bmz, cmph_uint3 } } if (next_g > *biggest_g_value) *biggest_g_value = next_g; - } + } // Marking used edges... it1 = graph_neighbors_it(bmz->graph, u); while((lav = graph_next_neighbor(bmz->graph, &it1)) != GRAPH_NO_NEIGHBOR) @@ -254,9 +254,9 @@ static cmph_uint8 bmz_traverse_critical_nodes(bmz_config_data_t *bmz, cmph_uint3 bmz->g[u] = next_g; // Labelling vertex u. SETBIT(visited,u); vqueue_insert(q, u); - } + } } - + } vqueue_destroy(q); return 0; @@ -282,22 +282,22 @@ static cmph_uint8 bmz_traverse_critical_nodes_heuristic(bmz_config_data_t *bmz, while(!vqueue_is_empty(q)) { v = vqueue_remove(q); - it = graph_neighbors_it(bmz->graph, v); + it = graph_neighbors_it(bmz->graph, v); while ((u = graph_next_neighbor(bmz->graph, &it)) != GRAPH_NO_NEIGHBOR) - { + { if (graph_node_is_critical(bmz->graph, u) && (!GETBIT(visited,u))) { cmph_uint32 next_g_index = 0; collision = 1; while(collision) // lookahead to resolve collisions { - if (next_g_index < nunused_g_values) + if (next_g_index < nunused_g_values) { - next_g = unused_g_values[next_g_index++]; + next_g = unused_g_values[next_g_index++]; } - else + else { - next_g = *biggest_g_value + 1; + next_g = *biggest_g_value + 1; next_g_index = UINT_MAX; } it1 = graph_neighbors_it(bmz->graph, u); @@ -312,7 +312,7 @@ static cmph_uint8 bmz_traverse_critical_nodes_heuristic(bmz_config_data_t *bmz, free(unused_g_values); return 1; // restart mapping step. } - if (GETBIT(used_edges, (next_g + bmz->g[lav]))) + if (GETBIT(used_edges, (next_g + bmz->g[lav]))) { collision = 1; break; @@ -324,13 +324,13 @@ static cmph_uint8 bmz_traverse_critical_nodes_heuristic(bmz_config_data_t *bmz, if(nunused_g_values == unused_g_values_capacity) { unused_g_values = (cmph_uint32 *)realloc(unused_g_values, (unused_g_values_capacity + BUFSIZ)*sizeof(cmph_uint32)); - unused_g_values_capacity += BUFSIZ; - } - unused_g_values[nunused_g_values++] = next_g; + unused_g_values_capacity += BUFSIZ; + } + unused_g_values[nunused_g_values++] = next_g; } if (next_g > *biggest_g_value) *biggest_g_value = next_g; - } + } next_g_index--; if (next_g_index < nunused_g_values) unused_g_values[next_g_index] = unused_g_values[--nunused_g_values]; @@ -347,13 +347,13 @@ static cmph_uint8 bmz_traverse_critical_nodes_heuristic(bmz_config_data_t *bmz, bmz->g[u] = next_g; // Labelling vertex u. SETBIT(visited, u); vqueue_insert(q, u); - } + } } - + } vqueue_destroy(q); free(unused_g_values); - return 0; + return 0; } static cmph_uint32 next_unused_edge(bmz_config_data_t *bmz, cmph_uint8 * used_edges, cmph_uint32 unused_edge_index) @@ -381,8 +381,8 @@ static void bmz_traverse(bmz_config_data_t *bmz, cmph_uint8 * used_edges, cmph_u SETBIT(visited, neighbor); (*unused_edge_index)++; bmz_traverse(bmz, used_edges, neighbor, unused_edge_index, visited); - - } + + } } static void bmz_traverse_non_critical_nodes(bmz_config_data_t *bmz, cmph_uint8 * used_edges, cmph_uint8 * visited) @@ -394,7 +394,7 @@ static void bmz_traverse_non_critical_nodes(bmz_config_data_t *bmz, cmph_uint8 * { v1 = graph_vertex_id(bmz->graph, i, 0); v2 = graph_vertex_id(bmz->graph, i, 1); - if((GETBIT(visited,v1) && GETBIT(visited,v2)) || (!GETBIT(visited,v1) && !GETBIT(visited,v2))) continue; + if((GETBIT(visited,v1) && GETBIT(visited,v2)) || (!GETBIT(visited,v1) && !GETBIT(visited,v2))) continue; if(GETBIT(visited,v1)) bmz_traverse(bmz, used_edges, v1, &unused_edge_index, visited); else bmz_traverse(bmz, used_edges, v2, &unused_edge_index, visited); @@ -403,7 +403,7 @@ static void bmz_traverse_non_critical_nodes(bmz_config_data_t *bmz, cmph_uint8 * for(i = 0; i < bmz->n; i++) { if(!GETBIT(visited,i)) - { + { bmz->g[i] = 0; SETBIT(visited, i); bmz_traverse(bmz, used_edges, i, &unused_edge_index, visited); @@ -411,14 +411,14 @@ static void bmz_traverse_non_critical_nodes(bmz_config_data_t *bmz, cmph_uint8 * } } - + static int bmz_gen_edges(cmph_config_t *mph) { cmph_uint32 e; bmz_config_data_t *bmz = (bmz_config_data_t *)mph->data; cmph_uint8 multiple_edges = 0; DEBUGP("Generating edges for %u vertices\n", bmz->n); - graph_clear_edges(bmz->graph); + graph_clear_edges(bmz->graph); mph->key_source->rewind(mph->key_source->data); for (e = 0; e < mph->key_source->nkeys; ++e) { @@ -426,12 +426,12 @@ static int bmz_gen_edges(cmph_config_t *mph) cmph_uint32 keylen; char *key = NULL; mph->key_source->read(mph->key_source->data, &key, &keylen); - + h1 = hash(bmz->hashes[0], key, keylen) % bmz->n; h2 = hash(bmz->hashes[1], key, keylen) % bmz->n; if (h1 == h2) if (++h2 >= bmz->n) h2 = 0; DEBUGP("key: %.*s h1: %u h2: %u\n", keylen, key, h1, h2); - if (h1 == h2) + if (h1 == h2) { if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e); mph->key_source->dispose(mph->key_source->data, key, keylen); @@ -472,7 +472,7 @@ int bmz_dump(cmph_t *mphf, FILE *fd) nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd); nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd); - + nbytes = fwrite(data->g, sizeof(cmph_uint32)*(data->n), (size_t)1, fd); #ifdef DEBUG cmph_uint32 i; @@ -510,8 +510,8 @@ void bmz_load(FILE *f, cmph_t *mphf) } DEBUGP("Reading m and n\n"); - nbytes = fread(&(bmz->n), sizeof(cmph_uint32), (size_t)1, f); - nbytes = fread(&(bmz->m), sizeof(cmph_uint32), (size_t)1, f); + nbytes = fread(&(bmz->n), sizeof(cmph_uint32), (size_t)1, f); + nbytes = fread(&(bmz->m), sizeof(cmph_uint32), (size_t)1, f); bmz->g = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*bmz->n); nbytes = fread(bmz->g, bmz->n*sizeof(cmph_uint32), (size_t)1, f); @@ -522,7 +522,7 @@ void bmz_load(FILE *f, cmph_t *mphf) #endif return; } - + cmph_uint32 bmz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen) { @@ -537,7 +537,7 @@ cmph_uint32 bmz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen) void bmz_destroy(cmph_t *mphf) { bmz_data_t *data = (bmz_data_t *)mphf->data; - free(data->g); + free(data->g); hash_state_destroy(data->hashes[0]); hash_state_destroy(data->hashes[1]); free(data->hashes); @@ -548,7 +548,7 @@ void bmz_destroy(cmph_t *mphf) /** \fn void bmz_pack(cmph_t *mphf, void *packed_mphf); * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. * \param mphf pointer to the resulting mphf - * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() + * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() */ void bmz_pack(cmph_t *mphf, void *packed_mphf) { @@ -579,26 +579,26 @@ void bmz_pack(cmph_t *mphf, void *packed_mphf) ptr += sizeof(data->n); // packing g - memcpy(ptr, data->g, sizeof(cmph_uint32)*data->n); + memcpy(ptr, data->g, sizeof(cmph_uint32)*data->n); } /** \fn cmph_uint32 bmz_packed_size(cmph_t *mphf); * \brief Return the amount of space needed to pack mphf. * \param mphf pointer to a mphf * \return the size of the packed function or zero for failures - */ + */ cmph_uint32 bmz_packed_size(cmph_t *mphf) { bmz_data_t *data = (bmz_data_t *)mphf->data; - CMPH_HASH h1_type = hash_get_type(data->hashes[0]); - CMPH_HASH h2_type = hash_get_type(data->hashes[1]); + CMPH_HASH h1_type = hash_get_type(data->hashes[0]); + CMPH_HASH h2_type = hash_get_type(data->hashes[1]); - return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) + + return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) + 3*sizeof(cmph_uint32) + sizeof(cmph_uint32)*data->n); } /** cmph_uint32 bmz_search(void *packed_mphf, const char *key, cmph_uint32 keylen); - * \brief Use the packed mphf to do a search. + * \brief Use the packed mphf to do a search. * \param packed_mphf pointer to the packed mphf * \param key key to be hashed * \param keylen key legth in bytes @@ -613,13 +613,13 @@ cmph_uint32 bmz_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type); register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr); h2_ptr += 4; - + register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type)); - - register cmph_uint32 n = *g_ptr++; - - register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n; - register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n; + + register cmph_uint32 n = *g_ptr++; + + register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n; + register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n; if (h1 == h2 && ++h2 > n) h2 = 0; - return (g_ptr[h1] + g_ptr[h2]); + return (g_ptr[h1] + g_ptr[h2]); } diff --git a/src/bmz8.c b/src/bmz8.c index 4db4dfc..54ba606 100644 --- a/src/bmz8.c +++ b/src/bmz8.c @@ -23,7 +23,7 @@ bmz8_config_data_t *bmz8_config_new(void) { bmz8_config_data_t *bmz8; bmz8 = (bmz8_config_data_t *)malloc(sizeof(bmz8_config_data_t)); - assert(bmz8); + if (!bmz8) return NULL; memset(bmz8, 0, sizeof(bmz8_config_data_t)); bmz8->hashfuncs[0] = CMPH_HASH_JENKINS; bmz8->hashfuncs[1] = CMPH_HASH_JENKINS; @@ -48,7 +48,7 @@ void bmz8_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs) while(*hashptr != CMPH_HASH_COUNT) { if (i >= 2) break; //bmz8 only uses two hash functions - bmz8->hashfuncs[i] = *hashptr; + bmz8->hashfuncs[i] = *hashptr; ++i, ++hashptr; } } @@ -64,7 +64,7 @@ cmph_t *bmz8_new(cmph_config_t *mph, double c) cmph_uint8 restart_mapping = 0; cmph_uint8 * visited = NULL; bmz8_config_data_t *bmz8 = (bmz8_config_data_t *)mph->data; - + if (mph->key_source->nkeys >= 256) { if (mph->verbosity) fprintf(stderr, "The number of keys in BMZ8 must be lower than 256.\n"); @@ -72,8 +72,8 @@ cmph_t *bmz8_new(cmph_config_t *mph, double c) } if (c == 0) c = 1.15; // validating restrictions over parameter c. DEBUGP("c: %f\n", c); - bmz8->m = (cmph_uint8) mph->key_source->nkeys; - bmz8->n = (cmph_uint8) ceil(c * mph->key_source->nkeys); + bmz8->m = (cmph_uint8) mph->key_source->nkeys; + bmz8->n = (cmph_uint8) ceil(c * mph->key_source->nkeys); DEBUGP("m (edges): %u n (vertices): %u c: %f\n", bmz8->m, bmz8->n, c); bmz8->graph = graph_new(bmz8->n, bmz8->m); DEBUGP("Created graph\n"); @@ -113,8 +113,8 @@ cmph_t *bmz8_new(cmph_config_t *mph, double c) fprintf(stderr, "simple graph creation failure - %u iterations remaining\n", iterations); } if (iterations == 0) break; - } - else break; + } + else break; } if (iterations == 0) { @@ -161,19 +161,19 @@ cmph_t *bmz8_new(cmph_config_t *mph, double c) } bmz8_traverse_non_critical_nodes(bmz8, used_edges, visited); // non_critical_nodes } - else + else { iterations_map--; if (mph->verbosity) fprintf(stderr, "Restarting mapping step. %u iterations remaining.\n", iterations_map); - } + } free(used_edges); free(visited); }while(restart_mapping && iterations_map > 0); - graph_destroy(bmz8->graph); + graph_destroy(bmz8->graph); bmz8->graph = NULL; - if (iterations_map == 0) + if (iterations_map == 0) { return NULL; } @@ -213,15 +213,15 @@ static cmph_uint8 bmz8_traverse_critical_nodes(bmz8_config_data_t *bmz8, cmph_ui while(!vqueue_is_empty(q)) { v = vqueue_remove(q); - it = graph_neighbors_it(bmz8->graph, v); + it = graph_neighbors_it(bmz8->graph, v); while ((u = graph_next_neighbor(bmz8->graph, &it)) != GRAPH_NO_NEIGHBOR) - { + { if (graph_node_is_critical(bmz8->graph, u) && (!GETBIT(visited,u))) { collision = 1; while(collision) // lookahead to resolve collisions { - next_g = (cmph_uint8)(*biggest_g_value + 1); + next_g = (cmph_uint8)(*biggest_g_value + 1); it1 = graph_neighbors_it(bmz8->graph, u); collision = 0; while((lav = graph_next_neighbor(bmz8->graph, &it1)) != GRAPH_NO_NEIGHBOR) @@ -233,7 +233,7 @@ static cmph_uint8 bmz8_traverse_critical_nodes(bmz8_config_data_t *bmz8, cmph_ui vqueue_destroy(q); return 1; // restart mapping step. } - if (GETBIT(used_edges, (next_g + bmz8->g[lav]))) + if (GETBIT(used_edges, (next_g + bmz8->g[lav]))) { collision = 1; break; @@ -241,7 +241,7 @@ static cmph_uint8 bmz8_traverse_critical_nodes(bmz8_config_data_t *bmz8, cmph_ui } } if (next_g > *biggest_g_value) *biggest_g_value = next_g; - } + } // Marking used edges... it1 = graph_neighbors_it(bmz8->graph, u); while((lav = graph_next_neighbor(bmz8->graph, &it1)) != GRAPH_NO_NEIGHBOR) @@ -250,16 +250,16 @@ static cmph_uint8 bmz8_traverse_critical_nodes(bmz8_config_data_t *bmz8, cmph_ui { SETBIT(used_edges,(next_g + bmz8->g[lav])); - if(next_g + bmz8->g[lav] > *biggest_edge_value) + if(next_g + bmz8->g[lav] > *biggest_edge_value) *biggest_edge_value = (cmph_uint8)(next_g + bmz8->g[lav]); } } bmz8->g[u] = next_g; // Labelling vertex u. SETBIT(visited,u); vqueue_insert(q, u); - } + } } - + } vqueue_destroy(q); return 0; @@ -268,8 +268,8 @@ static cmph_uint8 bmz8_traverse_critical_nodes(bmz8_config_data_t *bmz8, cmph_ui static cmph_uint8 bmz8_traverse_critical_nodes_heuristic(bmz8_config_data_t *bmz8, cmph_uint32 v, cmph_uint8 * biggest_g_value, cmph_uint8 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited) { cmph_uint8 next_g; - cmph_uint32 u; - cmph_uint32 lav; + cmph_uint32 u; + cmph_uint32 lav; cmph_uint8 collision; cmph_uint8 * unused_g_values = NULL; cmph_uint8 unused_g_values_capacity = 0; @@ -280,27 +280,27 @@ static cmph_uint8 bmz8_traverse_critical_nodes_heuristic(bmz8_config_data_t *bmz DEBUGP("Labelling critical vertices\n"); bmz8->g[v] = (cmph_uint8)(ceil ((double)(*biggest_edge_value)/2) - 1); SETBIT(visited, v); - next_g = (cmph_uint8)floor((double)(*biggest_edge_value/2)); + next_g = (cmph_uint8)floor((double)(*biggest_edge_value/2)); vqueue_insert(q, v); while(!vqueue_is_empty(q)) { v = vqueue_remove(q); - it = graph_neighbors_it(bmz8->graph, v); + it = graph_neighbors_it(bmz8->graph, v); while ((u = graph_next_neighbor(bmz8->graph, &it)) != GRAPH_NO_NEIGHBOR) - { + { if (graph_node_is_critical(bmz8->graph, u) && (!GETBIT(visited,u))) { cmph_uint8 next_g_index = 0; collision = 1; while(collision) // lookahead to resolve collisions { - if (next_g_index < nunused_g_values) + if (next_g_index < nunused_g_values) { next_g = unused_g_values[next_g_index++]; } - else + else { - next_g = (cmph_uint8)(*biggest_g_value + 1); + next_g = (cmph_uint8)(*biggest_g_value + 1); next_g_index = 255;//UINT_MAX; } it1 = graph_neighbors_it(bmz8->graph, u); @@ -315,7 +315,7 @@ static cmph_uint8 bmz8_traverse_critical_nodes_heuristic(bmz8_config_data_t *bmz free(unused_g_values); return 1; // restart mapping step. } - if (GETBIT(used_edges, (next_g + bmz8->g[lav]))) + if (GETBIT(used_edges, (next_g + bmz8->g[lav]))) { collision = 1; break; @@ -327,14 +327,14 @@ static cmph_uint8 bmz8_traverse_critical_nodes_heuristic(bmz8_config_data_t *bmz if(nunused_g_values == unused_g_values_capacity) { unused_g_values = (cmph_uint8*)realloc(unused_g_values, ((size_t)(unused_g_values_capacity + BUFSIZ))*sizeof(cmph_uint8)); - unused_g_values_capacity += (cmph_uint8)BUFSIZ; - } - unused_g_values[nunused_g_values++] = next_g; + unused_g_values_capacity += (cmph_uint8)BUFSIZ; + } + unused_g_values[nunused_g_values++] = next_g; } if (next_g > *biggest_g_value) *biggest_g_value = next_g; } - + next_g_index--; if (next_g_index < nunused_g_values) unused_g_values[next_g_index] = unused_g_values[--nunused_g_values]; @@ -345,22 +345,22 @@ static cmph_uint8 bmz8_traverse_critical_nodes_heuristic(bmz8_config_data_t *bmz if (graph_node_is_critical(bmz8->graph, lav) && GETBIT(visited, lav)) { SETBIT(used_edges,(next_g + bmz8->g[lav])); - if(next_g + bmz8->g[lav] > *biggest_edge_value) + if(next_g + bmz8->g[lav] > *biggest_edge_value) *biggest_edge_value = (cmph_uint8)(next_g + bmz8->g[lav]); } } - + bmz8->g[u] = next_g; // Labelling vertex u. SETBIT(visited, u); vqueue_insert(q, u); - - } + + } } - + } vqueue_destroy(q); free(unused_g_values); - return 0; + return 0; } static cmph_uint8 next_unused_edge(bmz8_config_data_t *bmz8, cmph_uint8 * used_edges, cmph_uint32 unused_edge_index) @@ -388,8 +388,8 @@ static void bmz8_traverse(bmz8_config_data_t *bmz8, cmph_uint8 * used_edges, cmp SETBIT(visited, neighbor); (*unused_edge_index)++; bmz8_traverse(bmz8, used_edges, neighbor, unused_edge_index, visited); - - } + + } } static void bmz8_traverse_non_critical_nodes(bmz8_config_data_t *bmz8, cmph_uint8 * used_edges, cmph_uint8 * visited) @@ -401,7 +401,7 @@ static void bmz8_traverse_non_critical_nodes(bmz8_config_data_t *bmz8, cmph_uint { v1 = (cmph_uint8)graph_vertex_id(bmz8->graph, i, 0); v2 = (cmph_uint8)graph_vertex_id(bmz8->graph, i, 1); - if((GETBIT(visited,v1) && GETBIT(visited,v2)) || (!GETBIT(visited,v1) && !GETBIT(visited,v2))) continue; + if((GETBIT(visited,v1) && GETBIT(visited,v2)) || (!GETBIT(visited,v1) && !GETBIT(visited,v2))) continue; if(GETBIT(visited,v1)) bmz8_traverse(bmz8, used_edges, v1, &unused_edge_index, visited); else bmz8_traverse(bmz8, used_edges, v2, &unused_edge_index, visited); @@ -410,7 +410,7 @@ static void bmz8_traverse_non_critical_nodes(bmz8_config_data_t *bmz8, cmph_uint for(i = 0; i < bmz8->n; i++) { if(!GETBIT(visited,i)) - { + { bmz8->g[i] = 0; SETBIT(visited, i); bmz8_traverse(bmz8, used_edges, i, &unused_edge_index, visited); @@ -418,14 +418,14 @@ static void bmz8_traverse_non_critical_nodes(bmz8_config_data_t *bmz8, cmph_uint } } - + static int bmz8_gen_edges(cmph_config_t *mph) { cmph_uint8 e; bmz8_config_data_t *bmz8 = (bmz8_config_data_t *)mph->data; cmph_uint8 multiple_edges = 0; DEBUGP("Generating edges for %u vertices\n", bmz8->n); - graph_clear_edges(bmz8->graph); + graph_clear_edges(bmz8->graph); mph->key_source->rewind(mph->key_source->data); for (e = 0; e < mph->key_source->nkeys; ++e) { @@ -433,12 +433,12 @@ static int bmz8_gen_edges(cmph_config_t *mph) cmph_uint32 keylen; char *key = NULL; mph->key_source->read(mph->key_source->data, &key, &keylen); - + // if (key == NULL)fprintf(stderr, "key = %s -- read BMZ\n", key); h1 = (cmph_uint8)(hash(bmz8->hashes[0], key, keylen) % bmz8->n); h2 = (cmph_uint8)(hash(bmz8->hashes[1], key, keylen) % bmz8->n); if (h1 == h2) if (++h2 >= bmz8->n) h2 = 0; - if (h1 == h2) + if (h1 == h2) { if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e); mph->key_source->dispose(mph->key_source->data, key, keylen); @@ -480,7 +480,7 @@ int bmz8_dump(cmph_t *mphf, FILE *fd) nbytes = fwrite(&(data->n), sizeof(cmph_uint8), (size_t)1, fd); nbytes = fwrite(&(data->m), sizeof(cmph_uint8), (size_t)1, fd); - + nbytes = fwrite(data->g, sizeof(cmph_uint8)*(data->n), (size_t)1, fd); /* #ifdef DEBUG fprintf(stderr, "G: "); @@ -518,8 +518,8 @@ void bmz8_load(FILE *f, cmph_t *mphf) } DEBUGP("Reading m and n\n"); - nbytes = fread(&(bmz8->n), sizeof(cmph_uint8), (size_t)1, f); - nbytes = fread(&(bmz8->m), sizeof(cmph_uint8), (size_t)1, f); + nbytes = fread(&(bmz8->n), sizeof(cmph_uint8), (size_t)1, f); + nbytes = fread(&(bmz8->m), sizeof(cmph_uint8), (size_t)1, f); bmz8->g = (cmph_uint8 *)malloc(sizeof(cmph_uint8)*bmz8->n); nbytes = fread(bmz8->g, bmz8->n*sizeof(cmph_uint8), (size_t)1, f); @@ -530,7 +530,7 @@ void bmz8_load(FILE *f, cmph_t *mphf) #endif return; } - + cmph_uint8 bmz8_search(cmph_t *mphf, const char *key, cmph_uint32 keylen) { @@ -556,7 +556,7 @@ void bmz8_destroy(cmph_t *mphf) /** \fn void bmz8_pack(cmph_t *mphf, void *packed_mphf); * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. * \param mphf pointer to the resulting mphf - * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() + * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() */ void bmz8_pack(cmph_t *mphf, void *packed_mphf) { @@ -585,26 +585,26 @@ void bmz8_pack(cmph_t *mphf, void *packed_mphf) *ptr++ = data->n; // packing g - memcpy(ptr, data->g, sizeof(cmph_uint8)*data->n); + memcpy(ptr, data->g, sizeof(cmph_uint8)*data->n); } /** \fn cmph_uint32 bmz8_packed_size(cmph_t *mphf); * \brief Return the amount of space needed to pack mphf. * \param mphf pointer to a mphf * \return the size of the packed function or zero for failures - */ + */ cmph_uint32 bmz8_packed_size(cmph_t *mphf) { bmz8_data_t *data = (bmz8_data_t *)mphf->data; - CMPH_HASH h1_type = hash_get_type(data->hashes[0]); - CMPH_HASH h2_type = hash_get_type(data->hashes[1]); + CMPH_HASH h1_type = hash_get_type(data->hashes[0]); + CMPH_HASH h2_type = hash_get_type(data->hashes[1]); - return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) + + return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) + 2*sizeof(cmph_uint32) + sizeof(cmph_uint8) + sizeof(cmph_uint8)*data->n); } /** cmph_uint8 bmz8_search(void *packed_mphf, const char *key, cmph_uint32 keylen); - * \brief Use the packed mphf to do a search. + * \brief Use the packed mphf to do a search. * \param packed_mphf pointer to the packed mphf * \param key key to be hashed * \param keylen key legth in bytes @@ -619,14 +619,14 @@ cmph_uint8 bmz8_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type); register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr); h2_ptr += 4; - + register cmph_uint8 *g_ptr = h2_ptr + hash_state_packed_size(h2_type); - - register cmph_uint8 n = *g_ptr++; - - register cmph_uint8 h1 = (cmph_uint8)(hash_packed(h1_ptr, h1_type, key, keylen) % n); - register cmph_uint8 h2 = (cmph_uint8)(hash_packed(h2_ptr, h2_type, key, keylen) % n); + + register cmph_uint8 n = *g_ptr++; + + register cmph_uint8 h1 = (cmph_uint8)(hash_packed(h1_ptr, h1_type, key, keylen) % n); + register cmph_uint8 h2 = (cmph_uint8)(hash_packed(h2_ptr, h2_type, key, keylen) % n); DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2); if (h1 == h2 && ++h2 > n) h2 = 0; - return (cmph_uint8)(g_ptr[h1] + g_ptr[h2]); + return (cmph_uint8)(g_ptr[h1] + g_ptr[h2]); } diff --git a/src/brz.c b/src/brz.c index f9c48ef..bac5bc5 100755 --- a/src/brz.c +++ b/src/brz.c @@ -26,8 +26,9 @@ static char * brz_copy_partial_fch_mphf(brz_config_data_t *brz, fch_data_t * fch static char * brz_copy_partial_bmz8_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_uint32 *buflen); brz_config_data_t *brz_config_new(void) { - brz_config_data_t *brz = NULL; + brz_config_data_t *brz = NULL; brz = (brz_config_data_t *)malloc(sizeof(brz_config_data_t)); + if (!brz) return NULL; brz->algo = CMPH_FCH; brz->b = 128; brz->hashfuncs[0] = CMPH_HASH_JENKINS; @@ -42,7 +43,7 @@ brz_config_data_t *brz_config_new(void) brz->memory_availability = 1024*1024; brz->tmp_dir = (cmph_uint8 *)calloc((size_t)10, sizeof(cmph_uint8)); brz->mphf_fd = NULL; - strcpy((char *)(brz->tmp_dir), "/var/tmp/"); + strcpy((char *)(brz->tmp_dir), "/var/tmp/"); assert(brz); return brz; } @@ -63,7 +64,7 @@ void brz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs) while(*hashptr != CMPH_HASH_COUNT) { if (i >= 3) break; //brz only uses three hash functions - brz->hashfuncs[i] = *hashptr; + brz->hashfuncs[i] = *hashptr; ++i, ++hashptr; } } @@ -84,14 +85,14 @@ void brz_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir) if(tmp_dir[len-1] != '/') { brz->tmp_dir = (cmph_uint8 *)calloc((size_t)len+2, sizeof(cmph_uint8)); - sprintf((char *)(brz->tmp_dir), "%s/", (char *)tmp_dir); + sprintf((char *)(brz->tmp_dir), "%s/", (char *)tmp_dir); } else { brz->tmp_dir = (cmph_uint8 *)calloc((size_t)len+1, sizeof(cmph_uint8)); - sprintf((char *)(brz->tmp_dir), "%s", (char *)tmp_dir); + sprintf((char *)(brz->tmp_dir), "%s", (char *)tmp_dir); } - + } } @@ -105,14 +106,14 @@ void brz_config_set_mphf_fd(cmph_config_t *mph, FILE *mphf_fd) void brz_config_set_b(cmph_config_t *mph, cmph_uint32 b) { brz_config_data_t *brz = (brz_config_data_t *)mph->data; - if(b <= 64 || b >= 175) + if(b <= 64 || b >= 175) { b = 128; } brz->b = (cmph_uint8)b; } -void brz_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo) +void brz_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo) { if (algo == CMPH_BMZ8 || algo == CMPH_FCH) // supported algorithms { @@ -147,13 +148,13 @@ cmph_t *brz_new(cmph_config_t *mph, double c) brz->k = (cmph_uint32)ceil(brz->m/((double)brz->b)); DEBUGP("k: %u\n", brz->k); brz->size = (cmph_uint8 *) calloc((size_t)brz->k, sizeof(cmph_uint8)); - + // Clustering the keys by graph id. if (mph->verbosity) { - fprintf(stderr, "Partioning the set of keys.\n"); + fprintf(stderr, "Partioning the set of keys.\n"); } - + while(1) { int ok; @@ -172,17 +173,17 @@ cmph_t *brz_new(cmph_config_t *mph, double c) fprintf(stderr, "Failure: A graph with more than 255 keys was created - %u iterations remaining\n", iterations); } if (iterations == 0) break; - } - else break; + } + else break; } - if (iterations == 0) + if (iterations == 0) { DEBUGP("Graphs with more than 255 keys were created in all 20 iterations\n"); free(brz->size); return NULL; } DEBUGP("Graphs generated\n"); - + brz->offset = (cmph_uint32 *)calloc((size_t)brz->k, sizeof(cmph_uint32)); for (i = 1; i < brz->k; ++i) { @@ -209,7 +210,7 @@ cmph_t *brz_new(cmph_config_t *mph, double c) brzf->m = brz->m; brzf->algo = brz->algo; mphf->data = brzf; - mphf->size = brz->m; + mphf->size = brz->m; DEBUGP("Successfully generated minimal perfect hash\n"); if (mph->verbosity) { @@ -240,7 +241,7 @@ static int brz_gen_mphf(cmph_config_t *mph) cmph_uint32 cur_bucket = 0; cmph_uint8 nkeys_vd = 0; cmph_uint8 ** keys_vd = NULL; - + mph->key_source->rewind(mph->key_source->data); DEBUGP("Generating graphs from %u keys\n", brz->m); // Partitioning @@ -249,7 +250,7 @@ static int brz_gen_mphf(cmph_config_t *mph) mph->key_source->read(mph->key_source->data, &key, &keylen); /* Buffers management */ - if (memory_usage + keylen + sizeof(keylen) > brz->memory_availability) // flush buffers + if (memory_usage + keylen + sizeof(keylen) > brz->memory_availability) // flush buffers { if(mph->verbosity) { @@ -265,8 +266,8 @@ static int brz_gen_mphf(cmph_config_t *mph) sum += value; value = buckets_size[i]; buckets_size[i] = sum; - - } + + } memory_usage = 0; keys_index = (cmph_uint32 *)calloc((size_t)nkeys_in_buffer, sizeof(cmph_uint32)); for(i = 0; i < nkeys_in_buffer; i++) @@ -298,8 +299,8 @@ static int brz_gen_mphf(cmph_config_t *mph) memcpy(buffer + memory_usage + sizeof(keylen), key, (size_t)keylen); memory_usage += keylen + (cmph_uint32)sizeof(keylen); h0 = hash(brz->h0, key, keylen) % brz->k; - - if ((brz->size[h0] == MAX_BUCKET_SIZE) || (brz->algo == CMPH_BMZ8 && ((brz->c >= 1.0) && (cmph_uint8)(brz->c * brz->size[h0]) < brz->size[h0]))) + + if ((brz->size[h0] == MAX_BUCKET_SIZE) || (brz->algo == CMPH_BMZ8 && ((brz->c >= 1.0) && (cmph_uint8)(brz->c * brz->size[h0]) < brz->size[h0]))) { free(buffer); free(buckets_size); @@ -310,8 +311,8 @@ static int brz_gen_mphf(cmph_config_t *mph) nkeys_in_buffer++; mph->key_source->dispose(mph->key_source->data, key, keylen); } - if (memory_usage != 0) // flush buffers - { + if (memory_usage != 0) // flush buffers + { if(mph->verbosity) { fprintf(stderr, "Flushing %u\n", nkeys_in_buffer); @@ -370,12 +371,12 @@ static int brz_gen_mphf(cmph_config_t *mph) nbytes = fwrite(&(brz->algo), sizeof(brz->algo), (size_t)1, brz->mphf_fd); nbytes = fwrite(&(brz->k), sizeof(cmph_uint32), (size_t)1, brz->mphf_fd); // number of MPHFs nbytes = fwrite(brz->size, sizeof(cmph_uint8)*(brz->k), (size_t)1, brz->mphf_fd); - + //tmp_fds = (FILE **)calloc(nflushes, sizeof(FILE *)); buff_manager = buffer_manager_new(brz->memory_availability, nflushes); buffer_merge = (cmph_uint8 **)calloc((size_t)nflushes, sizeof(cmph_uint8 *)); buffer_h0 = (cmph_uint32 *)calloc((size_t)nflushes, sizeof(cmph_uint32)); - + memory_usage = 0; for(i = 0; i < nflushes; i++) { @@ -388,7 +389,7 @@ static int brz_gen_mphf(cmph_config_t *mph) h0 = hash(brz->h0, key+sizeof(keylen), keylen) % brz->k; buffer_h0[i] = h0; buffer_merge[i] = (cmph_uint8 *)key; - key = NULL; //transfer memory ownership + key = NULL; //transfer memory ownership } e = 0; keys_vd = (cmph_uint8 **)calloc((size_t)MAX_BUCKET_SIZE, sizeof(cmph_uint8 *)); @@ -429,7 +430,7 @@ static int brz_gen_mphf(cmph_config_t *mph) e++; buffer_h0[i] = UINT_MAX; } - + if(nkeys_vd == brz->size[cur_bucket]) // Generating mphf for each bucket. { cmph_io_adapter_t *source = NULL; @@ -444,7 +445,7 @@ static int brz_gen_mphf(cmph_config_t *mph) //cmph_config_set_algo(config, CMPH_BMZ8); cmph_config_set_graphsize(config, brz->c); mphf_tmp = cmph_new(config); - if (mphf_tmp == NULL) + if (mphf_tmp == NULL) { if(mph->verbosity) fprintf(stderr, "ERROR: Can't generate MPHF for bucket %u out of %u\n", cur_bucket + 1, brz->k); error = 1; @@ -453,9 +454,9 @@ static int brz_gen_mphf(cmph_config_t *mph) cmph_io_byte_vector_adapter_destroy(source); break; } - if(mph->verbosity) + if(mph->verbosity) { - if (cur_bucket % 1000 == 0) + if (cur_bucket % 1000 == 0) { fprintf(stderr, "MPHF for bucket %u out of %u was generated.\n", cur_bucket + 1, brz->k); } @@ -465,7 +466,7 @@ static int brz_gen_mphf(cmph_config_t *mph) case CMPH_FCH: { fch_data_t * fchf = NULL; - fchf = (fch_data_t *)mphf_tmp->data; + fchf = (fch_data_t *)mphf_tmp->data; bufmphf = brz_copy_partial_fch_mphf(brz, fchf, cur_bucket, &buflenmphf); } break; @@ -516,7 +517,7 @@ static char * brz_copy_partial_fch_mphf(brz_config_data_t *brz, fch_data_t * fch { cmph_uint32 i = 0; cmph_uint32 buflenh1 = 0; - cmph_uint32 buflenh2 = 0; + cmph_uint32 buflenh2 = 0; char * bufh1 = NULL; char * bufh2 = NULL; char * buf = NULL; @@ -528,7 +529,7 @@ static char * brz_copy_partial_fch_mphf(brz_config_data_t *brz, fch_data_t * fch memcpy(buf, &buflenh1, sizeof(cmph_uint32)); memcpy(buf+sizeof(cmph_uint32), bufh1, (size_t)buflenh1); memcpy(buf+sizeof(cmph_uint32)+buflenh1, &buflenh2, sizeof(cmph_uint32)); - memcpy(buf+2*sizeof(cmph_uint32)+buflenh1, bufh2, (size_t)buflenh2); + memcpy(buf+2*sizeof(cmph_uint32)+buflenh1, bufh2, (size_t)buflenh2); for (i = 0; i < n; i++) memcpy(buf+2*sizeof(cmph_uint32)+buflenh1+buflenh2+i,(fchf->g + i), (size_t)1); free(bufh1); free(bufh2); @@ -537,7 +538,7 @@ static char * brz_copy_partial_fch_mphf(brz_config_data_t *brz, fch_data_t * fch static char * brz_copy_partial_bmz8_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_uint32 *buflen) { cmph_uint32 buflenh1 = 0; - cmph_uint32 buflenh2 = 0; + cmph_uint32 buflenh2 = 0; char * bufh1 = NULL; char * bufh2 = NULL; char * buf = NULL; @@ -572,7 +573,7 @@ int brz_dump(cmph_t *mphf, FILE *fd) nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd); free(buf); // Dumping m and the vector offset. - nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd); + nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd); nbytes = fwrite(data->offset, sizeof(cmph_uint32)*(data->k), (size_t)1, fd); return 1; } @@ -591,7 +592,7 @@ void brz_load(FILE *f, cmph_t *mphf) nbytes = fread(&(brz->algo), sizeof(brz->algo), (size_t)1, f); // Reading algo. nbytes = fread(&(brz->k), sizeof(cmph_uint32), (size_t)1, f); brz->size = (cmph_uint8 *) malloc(sizeof(cmph_uint8)*brz->k); - nbytes = fread(brz->size, sizeof(cmph_uint8)*(brz->k), (size_t)1, f); + nbytes = fread(brz->size, sizeof(cmph_uint8)*(brz->k), (size_t)1, f); brz->h1 = (hash_state_t **)malloc(sizeof(hash_state_t *)*brz->k); brz->h2 = (hash_state_t **)malloc(sizeof(hash_state_t *)*brz->k); brz->g = (cmph_uint8 **) calloc((size_t)brz->k, sizeof(cmph_uint8 *)); @@ -635,7 +636,7 @@ void brz_load(FILE *f, cmph_t *mphf) brz->h0 = hash_state_load(buf, buflen); free(buf); - //loading c, m, and the vector offset. + //loading c, m, and the vector offset. nbytes = fread(&(brz->m), sizeof(cmph_uint32), (size_t)1, f); brz->offset = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*brz->k); nbytes = fread(brz->offset, sizeof(cmph_uint32)*(brz->k), (size_t)1, f); @@ -654,9 +655,9 @@ static cmph_uint32 brz_bmz8_search(brz_data_t *brz, const char *key, cmph_uint32 register cmph_uint32 h1 = hash(brz->h1[h0], key, keylen) % n; register cmph_uint32 h2 = hash(brz->h2[h0], key, keylen) % n; register cmph_uint8 mphf_bucket; - + if (h1 == h2 && ++h2 >= n) h2 = 0; - mphf_bucket = (cmph_uint8)(brz->g[h0][h1] + brz->g[h0][h2]); + mphf_bucket = (cmph_uint8)(brz->g[h0][h1] + brz->g[h0][h2]); DEBUGP("key: %s h1: %u h2: %u h0: %u\n", key, h1, h2, h0); DEBUGP("key: %s g[h1]: %u g[h2]: %u offset[h0]: %u edges: %u\n", key, brz->g[h0][h1], brz->g[h0][h2], brz->offset[h0], brz->m); DEBUGP("Address: %u\n", mphf_bucket + brz->offset[h0]); @@ -722,61 +723,61 @@ void brz_destroy(cmph_t *mphf) /** \fn void brz_pack(cmph_t *mphf, void *packed_mphf); * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. * \param mphf pointer to the resulting mphf - * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() + * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() */ void brz_pack(cmph_t *mphf, void *packed_mphf) { brz_data_t *data = (brz_data_t *)mphf->data; cmph_uint8 * ptr = packed_mphf; cmph_uint32 i,n; - + // packing internal algo type memcpy(ptr, &(data->algo), sizeof(data->algo)); ptr += sizeof(data->algo); // packing h0 type - CMPH_HASH h0_type = hash_get_type(data->h0); + CMPH_HASH h0_type = hash_get_type(data->h0); memcpy(ptr, &h0_type, sizeof(h0_type)); ptr += sizeof(h0_type); // packing h0 hash_state_pack(data->h0, ptr); ptr += hash_state_packed_size(h0_type); - + // packing k memcpy(ptr, &(data->k), sizeof(data->k)); ptr += sizeof(data->k); // packing c - *((cmph_uint64 *)ptr) = (cmph_uint64)data->c; + *((cmph_uint64 *)ptr) = (cmph_uint64)data->c; ptr += sizeof(data->c); // packing h1 type - CMPH_HASH h1_type = hash_get_type(data->h1[0]); + CMPH_HASH h1_type = hash_get_type(data->h1[0]); memcpy(ptr, &h1_type, sizeof(h1_type)); ptr += sizeof(h1_type); // packing h2 type - CMPH_HASH h2_type = hash_get_type(data->h2[0]); + CMPH_HASH h2_type = hash_get_type(data->h2[0]); memcpy(ptr, &h2_type, sizeof(h2_type)); ptr += sizeof(h2_type); // packing size - memcpy(ptr, data->size, sizeof(cmph_uint8)*data->k); + memcpy(ptr, data->size, sizeof(cmph_uint8)*data->k); ptr += data->k; - + // packing offset - memcpy(ptr, data->offset, sizeof(cmph_uint32)*data->k); + memcpy(ptr, data->offset, sizeof(cmph_uint32)*data->k); ptr += sizeof(cmph_uint32)*data->k; - + #if defined (__ia64) || defined (__x86_64__) cmph_uint64 * g_is_ptr = (cmph_uint64 *)ptr; #else cmph_uint32 * g_is_ptr = (cmph_uint32 *)ptr; #endif - + cmph_uint8 * g_i = (cmph_uint8 *) (g_is_ptr + data->k); - + for(i = 0; i < data->k; i++) { #if defined (__ia64) || defined (__x86_64__) @@ -787,7 +788,7 @@ void brz_pack(cmph_t *mphf, void *packed_mphf) // packing h1[i] hash_state_pack(data->h1[i], g_i); g_i += hash_state_packed_size(h1_type); - + // packing h2[i] hash_state_pack(data->h2[i], g_i); g_i += hash_state_packed_size(h2_type); @@ -803,9 +804,9 @@ void brz_pack(cmph_t *mphf, void *packed_mphf) break; default: assert(0); } - memcpy(g_i, data->g[i], sizeof(cmph_uint8)*n); + memcpy(g_i, data->g[i], sizeof(cmph_uint8)*n); g_i += n; - + } } @@ -814,16 +815,16 @@ void brz_pack(cmph_t *mphf, void *packed_mphf) * \brief Return the amount of space needed to pack mphf. * \param mphf pointer to a mphf * \return the size of the packed function or zero for failures - */ + */ cmph_uint32 brz_packed_size(cmph_t *mphf) { cmph_uint32 i; cmph_uint32 size = 0; brz_data_t *data = (brz_data_t *)mphf->data; - CMPH_HASH h0_type = hash_get_type(data->h0); - CMPH_HASH h1_type = hash_get_type(data->h1[0]); + CMPH_HASH h0_type = hash_get_type(data->h0); + CMPH_HASH h1_type = hash_get_type(data->h1[0]); CMPH_HASH h2_type = hash_get_type(data->h2[0]); - size = (cmph_uint32)(2*sizeof(CMPH_ALGO) + 3*sizeof(CMPH_HASH) + hash_state_packed_size(h0_type) + sizeof(cmph_uint32) + + size = (cmph_uint32)(2*sizeof(CMPH_ALGO) + 3*sizeof(CMPH_HASH) + hash_state_packed_size(h0_type) + sizeof(cmph_uint32) + sizeof(double) + sizeof(cmph_uint8)*data->k + sizeof(cmph_uint32)*data->k); // pointers to g_is #if defined (__ia64) || defined (__x86_64__) @@ -831,10 +832,10 @@ cmph_uint32 brz_packed_size(cmph_t *mphf) #else size += (cmph_uint32) sizeof(cmph_uint32)*data->k; #endif - + size += hash_state_packed_size(h1_type) * data->k; size += hash_state_packed_size(h2_type) * data->k; - + cmph_uint32 n = 0; for(i = 0; i < data->k; i++) { @@ -848,7 +849,7 @@ cmph_uint32 brz_packed_size(cmph_t *mphf) break; default: assert(0); } - size += n; + size += n; } return size; } @@ -859,28 +860,28 @@ static cmph_uint32 brz_bmz8_search_packed(cmph_uint32 *packed_mphf, const char * { register CMPH_HASH h0_type = *packed_mphf++; register cmph_uint32 *h0_ptr = packed_mphf; - packed_mphf = (cmph_uint32 *)(((cmph_uint8 *)packed_mphf) + hash_state_packed_size(h0_type)); - + packed_mphf = (cmph_uint32 *)(((cmph_uint8 *)packed_mphf) + hash_state_packed_size(h0_type)); + register cmph_uint32 k = *packed_mphf++; register double c = (double)(*((cmph_uint64*)packed_mphf)); packed_mphf += 2; - register CMPH_HASH h1_type = *packed_mphf++; - - register CMPH_HASH h2_type = *packed_mphf++; + register CMPH_HASH h1_type = *packed_mphf++; + + register CMPH_HASH h2_type = *packed_mphf++; register cmph_uint8 * size = (cmph_uint8 *) packed_mphf; - packed_mphf = (cmph_uint32 *)(size + k); - + packed_mphf = (cmph_uint32 *)(size + k); + register cmph_uint32 * offset = packed_mphf; packed_mphf += k; register cmph_uint32 h0; - + hash_vector_packed(h0_ptr, h0_type, key, keylen, fingerprint); h0 = fingerprint[2] % k; - + register cmph_uint32 m = size[h0]; register cmph_uint32 n = (cmph_uint32)ceil(c * m); @@ -889,69 +890,69 @@ static cmph_uint32 brz_bmz8_search_packed(cmph_uint32 *packed_mphf, const char * #else register cmph_uint32 * g_is_ptr = packed_mphf; #endif - + register cmph_uint8 * h1_ptr = (cmph_uint8 *) g_is_ptr[h0]; - + register cmph_uint8 * h2_ptr = h1_ptr + hash_state_packed_size(h1_type); register cmph_uint8 * g = h2_ptr + hash_state_packed_size(h2_type); - + register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n; register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n; register cmph_uint8 mphf_bucket; - + if (h1 == h2 && ++h2 >= n) h2 = 0; - mphf_bucket = (cmph_uint8)(g[h1] + g[h2]); + mphf_bucket = (cmph_uint8)(g[h1] + g[h2]); DEBUGP("key: %s h1: %u h2: %u h0: %u\n", key, h1, h2, h0); DEBUGP("Address: %u\n", mphf_bucket + offset[h0]); - return (mphf_bucket + offset[h0]); + return (mphf_bucket + offset[h0]); } static cmph_uint32 brz_fch_search_packed(cmph_uint32 *packed_mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint) { register CMPH_HASH h0_type = *packed_mphf++; - + register cmph_uint32 *h0_ptr = packed_mphf; - packed_mphf = (cmph_uint32 *)(((cmph_uint8 *)packed_mphf) + hash_state_packed_size(h0_type)); - + packed_mphf = (cmph_uint32 *)(((cmph_uint8 *)packed_mphf) + hash_state_packed_size(h0_type)); + register cmph_uint32 k = *packed_mphf++; register double c = (double)(*((cmph_uint64*)packed_mphf)); packed_mphf += 2; - register CMPH_HASH h1_type = *packed_mphf++; + register CMPH_HASH h1_type = *packed_mphf++; - register CMPH_HASH h2_type = *packed_mphf++; + register CMPH_HASH h2_type = *packed_mphf++; register cmph_uint8 * size = (cmph_uint8 *) packed_mphf; - packed_mphf = (cmph_uint32 *)(size + k); - + packed_mphf = (cmph_uint32 *)(size + k); + register cmph_uint32 * offset = packed_mphf; packed_mphf += k; - + register cmph_uint32 h0; - + hash_vector_packed(h0_ptr, h0_type, key, keylen, fingerprint); h0 = fingerprint[2] % k; - + register cmph_uint32 m = size[h0]; register cmph_uint32 b = fch_calc_b(c, m); register double p1 = fch_calc_p1(m); register double p2 = fch_calc_p2(b); - + #if defined (__ia64) || defined (__x86_64__) register cmph_uint64 * g_is_ptr = (cmph_uint64 *)packed_mphf; #else register cmph_uint32 * g_is_ptr = packed_mphf; #endif - + register cmph_uint8 * h1_ptr = (cmph_uint8 *) g_is_ptr[h0]; - + register cmph_uint8 * h2_ptr = h1_ptr + hash_state_packed_size(h1_type); register cmph_uint8 * g = h2_ptr + hash_state_packed_size(h2_type); - + register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % m; register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % m; @@ -962,7 +963,7 @@ static cmph_uint32 brz_fch_search_packed(cmph_uint32 *packed_mphf, const char *k } /** cmph_uint32 brz_search(void *packed_mphf, const char *key, cmph_uint32 keylen); - * \brief Use the packed mphf to do a search. + * \brief Use the packed mphf to do a search. * \param packed_mphf pointer to the packed mphf * \param key key to be hashed * \param keylen key legth in bytes @@ -970,7 +971,7 @@ static cmph_uint32 brz_fch_search_packed(cmph_uint32 *packed_mphf, const char *k */ cmph_uint32 brz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen) { - register cmph_uint32 *ptr = (cmph_uint32 *)packed_mphf; + register cmph_uint32 *ptr = (cmph_uint32 *)packed_mphf; register CMPH_ALGO algo = *ptr++; cmph_uint32 fingerprint[3]; switch(algo) @@ -982,4 +983,3 @@ cmph_uint32 brz_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke default: assert(0); } } - diff --git a/src/buffer_entry.c b/src/buffer_entry.c index 5dcc4d5..65ebfda 100644 --- a/src/buffer_entry.c +++ b/src/buffer_entry.c @@ -17,7 +17,7 @@ struct __buffer_entry_t buffer_entry_t * buffer_entry_new(cmph_uint32 capacity) { buffer_entry_t *buff_entry = (buffer_entry_t *)malloc(sizeof(buffer_entry_t)); - assert(buff_entry); + if (!buff_entry) return NULL; buff_entry->fd = NULL; buff_entry->buff = NULL; buff_entry->capacity = capacity; @@ -62,7 +62,7 @@ cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry, cmph_uint32 * free(buf); return NULL; } - if((buffer_entry->pos + lacked_bytes) > buffer_entry->nbytes) + if((buffer_entry->pos + lacked_bytes) > buffer_entry->nbytes) { copied_bytes = buffer_entry->nbytes - buffer_entry->pos; lacked_bytes = (buffer_entry->pos + lacked_bytes) - buffer_entry->nbytes; @@ -71,7 +71,7 @@ cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry, cmph_uint32 * } memcpy(keylen + copied_bytes, buffer_entry->buff + buffer_entry->pos, (size_t)lacked_bytes); buffer_entry->pos += lacked_bytes; - + lacked_bytes = *keylen; copied_bytes = 0; buf = (cmph_uint8 *)malloc(*keylen + sizeof(*keylen)); @@ -83,7 +83,7 @@ cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry, cmph_uint32 * memcpy(buf + sizeof(*keylen), buffer_entry->buff + buffer_entry->pos, (size_t)copied_bytes); } buffer_entry_load(buffer_entry); - } + } memcpy(buf+sizeof(*keylen)+copied_bytes, buffer_entry->buff + buffer_entry->pos, (size_t)lacked_bytes); buffer_entry->pos += lacked_bytes; return buf; @@ -97,7 +97,7 @@ void buffer_entry_destroy(buffer_entry_t * buffer_entry) buffer_entry->buff = NULL; buffer_entry->capacity = 0; buffer_entry->nbytes = 0; - buffer_entry->pos = 0; + buffer_entry->pos = 0; buffer_entry->eof = 0; free(buffer_entry); } diff --git a/src/buffer_manage.c b/src/buffer_manage.c index fdefc62..93ec327 100644 --- a/src/buffer_manage.c +++ b/src/buffer_manage.c @@ -16,7 +16,7 @@ buffer_manage_t * buffer_manage_new(cmph_uint32 memory_avail, cmph_uint32 nentri { cmph_uint32 memory_avail_entry, i; buffer_manage_t *buff_manage = (buffer_manage_t *)malloc(sizeof(buffer_manage_t)); - assert(buff_manage); + if (!buff_manage) return NULL; buff_manage->memory_avail = memory_avail; buff_manage->buffer_entries = (buffer_entry_t **)calloc((size_t)nentries, sizeof(buffer_entry_t *)); buff_manage->memory_avail_list = (cmph_uint32 *)calloc((size_t)nentries, sizeof(cmph_uint32)); @@ -26,7 +26,7 @@ buffer_manage_t * buffer_manage_new(cmph_uint32 memory_avail, cmph_uint32 nentri for(i = 0; i < buff_manage->nentries; i++) { buff_manage->buffer_entries[i] = buffer_entry_new(memory_avail_entry); - } + } return buff_manage; } @@ -54,7 +54,7 @@ cmph_uint8 * buffer_manage_read_key(buffer_manage_t * buffer_manage, cmph_uint32 } void buffer_manage_destroy(buffer_manage_t * buffer_manage) -{ +{ cmph_uint32 i; for(i = 0; i < buffer_manage->nentries; i++) { diff --git a/src/buffer_manager.c b/src/buffer_manager.c index 5a051e2..243d4d9 100644 --- a/src/buffer_manager.c +++ b/src/buffer_manager.c @@ -16,7 +16,7 @@ buffer_manager_t * buffer_manager_new(cmph_uint32 memory_avail, cmph_uint32 nent { cmph_uint32 memory_avail_entry, i; buffer_manager_t *buff_manager = (buffer_manager_t *)malloc(sizeof(buffer_manager_t)); - assert(buff_manager); + if (!buff_manager) return NULL; buff_manager->memory_avail = memory_avail; buff_manager->buffer_entries = (buffer_entry_t **)calloc((size_t)nentries, sizeof(buffer_entry_t *)); buff_manager->memory_avail_list = (cmph_uint32 *)calloc((size_t)nentries, sizeof(cmph_uint32)); @@ -26,7 +26,7 @@ buffer_manager_t * buffer_manager_new(cmph_uint32 memory_avail, cmph_uint32 nent for(i = 0; i < buff_manager->nentries; i++) { buff_manager->buffer_entries[i] = buffer_entry_new(memory_avail_entry); - } + } return buff_manager; } @@ -52,7 +52,7 @@ cmph_uint8 * buffer_manager_read_key(buffer_manager_t * buffer_manager, cmph_uin } void buffer_manager_destroy(buffer_manager_t * buffer_manager) -{ +{ cmph_uint32 i; for(i = 0; i < buffer_manager->nentries; i++) { diff --git a/src/chd.c b/src/chd.c index 3eec2b3..6aafdbc 100644 --- a/src/chd.c +++ b/src/chd.c @@ -18,7 +18,7 @@ chd_config_data_t *chd_config_new(cmph_config_t *mph) cmph_io_adapter_t *key_source = mph->key_source; chd_config_data_t *chd; chd = (chd_config_data_t *)malloc(sizeof(chd_config_data_t)); - assert(chd); + if (!chd) return NULL; memset(chd, 0, sizeof(chd_config_data_t)); chd->chd_ph = cmph_config_new(key_source); @@ -69,12 +69,12 @@ cmph_t *chd_new(cmph_config_t *mph, double c) chd_config_data_t *chd = (chd_config_data_t *)mph->data; chd_ph_config_data_t * chd_ph = (chd_ph_config_data_t *)chd->chd_ph->data; compressed_rank_t cr; - + register cmph_t * chd_phf = NULL; - register cmph_uint32 packed_chd_phf_size = 0; + register cmph_uint32 packed_chd_phf_size = 0; cmph_uint8 * packed_chd_phf = NULL; - - register cmph_uint32 packed_cr_size = 0; + + register cmph_uint32 packed_cr_size = 0; cmph_uint8 * packed_cr = NULL; register cmph_uint32 i, idx, nkeys, nvals, nbins; @@ -86,24 +86,24 @@ cmph_t *chd_new(cmph_config_t *mph, double c) ELAPSED_TIME_IN_SECONDS(&construction_time_begin); #endif - cmph_config_set_verbosity(chd->chd_ph, mph->verbosity); + cmph_config_set_verbosity(chd->chd_ph, mph->verbosity); cmph_config_set_graphsize(chd->chd_ph, c); - + if (mph->verbosity) { fprintf(stderr, "Generating a CHD_PH perfect hash function with a load factor equal to %.3f\n", c); } - + chd_phf = cmph_new(chd->chd_ph); - - if(chd_phf == NULL) + + if(chd_phf == NULL) { return NULL; } - - packed_chd_phf_size = cmph_packed_size(chd_phf); + + packed_chd_phf_size = cmph_packed_size(chd_phf); DEBUGP("packed_chd_phf_size = %u\n", packed_chd_phf_size); - + /* Make sure that we have enough space to pack the mphf. */ packed_chd_phf = calloc((size_t)packed_chd_phf_size,(size_t)1); @@ -111,8 +111,8 @@ cmph_t *chd_new(cmph_config_t *mph, double c) cmph_pack(chd_phf, packed_chd_phf); cmph_destroy(chd_phf); - - + + if (mph->verbosity) { fprintf(stderr, "Compressing the range of the resulting CHD_PH perfect hash function\n"); @@ -121,11 +121,11 @@ cmph_t *chd_new(cmph_config_t *mph, double c) compressed_rank_init(&cr); nbins = chd_ph->n; nkeys = chd_ph->m; - nvals = nbins - nkeys; - + nvals = nbins - nkeys; + vals_table = (cmph_uint32 *)calloc(nvals, sizeof(cmph_uint32)); occup_table = (cmph_uint32 *)chd_ph->occup_table; - + for(i = 0, idx = 0; i < nbins; i++) { if(!GETBIT32(occup_table, i)) @@ -133,10 +133,10 @@ cmph_t *chd_new(cmph_config_t *mph, double c) vals_table[idx++] = i; } } - + compressed_rank_generate(&cr, vals_table, nvals); free(vals_table); - + packed_cr_size = compressed_rank_packed_size(&cr); packed_cr = (cmph_uint8 *) calloc(packed_cr_size, sizeof(cmph_uint8)); compressed_rank_pack(&cr, packed_cr); @@ -145,16 +145,16 @@ cmph_t *chd_new(cmph_config_t *mph, double c) mphf = (cmph_t *)malloc(sizeof(cmph_t)); mphf->algo = mph->algo; chdf = (chd_data_t *)malloc(sizeof(chd_data_t)); - + chdf->packed_cr = packed_cr; packed_cr = NULL; //transfer memory ownership chdf->packed_chd_phf = packed_chd_phf; packed_chd_phf = NULL; //transfer memory ownership - + chdf->packed_chd_phf_size = packed_chd_phf_size; chdf->packed_cr_size = packed_cr_size; - + mphf->data = chdf; mphf->size = nkeys; @@ -163,12 +163,12 @@ cmph_t *chd_new(cmph_config_t *mph, double c) { fprintf(stderr, "Successfully generated minimal perfect hash function\n"); } - #ifdef CMPH_TIMING + #ifdef CMPH_TIMING ELAPSED_TIME_IN_SECONDS(&construction_time); register cmph_uint32 space_usage = chd_packed_size(mphf)*8; construction_time = construction_time - construction_time_begin; fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", nkeys, c, chd_ph->keys_per_bucket, construction_time, space_usage/(double)nkeys); - #endif + #endif return mphf; } @@ -196,7 +196,7 @@ int chd_dump(cmph_t *mphf, FILE *fd) { register size_t nbytes; chd_data_t *data = (chd_data_t *)mphf->data; - + __cmph_dump(mphf, fd); // Dumping CHD_PH perfect hash function @@ -207,7 +207,7 @@ int chd_dump(cmph_t *mphf, FILE *fd) DEBUGP("Dumping compressed rank structure with %u bytes to disk\n", 1); nbytes = fwrite(&data->packed_cr_size, sizeof(cmph_uint32), (size_t)1, fd); nbytes = fwrite(data->packed_cr, data->packed_cr_size, (size_t)1, fd); - + return 1; } @@ -242,10 +242,10 @@ void chd_pack(cmph_t *mphf, void *packed_mphf) // packing packed_cr_size and packed_cr *ptr = data->packed_cr_size; ptr8 = (cmph_uint8 *) (ptr + 1); - + memcpy(ptr8, data->packed_cr, data->packed_cr_size); ptr8 += data->packed_cr_size; - + ptr = (cmph_uint32 *) ptr8; *ptr = data->packed_chd_phf_size; @@ -268,5 +268,3 @@ cmph_uint32 chd_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke register cmph_uint8 * packed_chd_phf = ((cmph_uint8 *) ptr) + packed_cr_size + sizeof(cmph_uint32); return _chd_search(packed_chd_phf, ptr, key, keylen); } - - diff --git a/src/chd_ph.c b/src/chd_ph.c index 71f83fb..d225156 100644 --- a/src/chd_ph.c +++ b/src/chd_ph.c @@ -29,7 +29,7 @@ struct _chd_ph_item_t }; typedef struct _chd_ph_item_t chd_ph_item_t; -// struct to represent the items at mapping phase only. +// struct to represent the items at mapping phase only. struct _chd_ph_map_item_t { cmph_uint32 f; @@ -85,7 +85,7 @@ static cmph_uint8 chd_ph_bucket_insert(chd_ph_bucket_t * buckets,chd_ph_map_item register chd_ph_map_item_t * tmp_map_item = map_items + item_idx; register chd_ph_bucket_t * bucket = buckets + tmp_map_item->bucket_num; tmp_item = items + bucket->items_list; - + for(i = 0; i < bucket->size; i++) { if(tmp_item->f == tmp_map_item->f && tmp_item->h == tmp_map_item->h) @@ -105,7 +105,7 @@ void chd_ph_bucket_destroy(chd_ph_bucket_t * buckets) free(buckets); } -static inline cmph_uint8 chd_ph_mapping(cmph_config_t *mph, chd_ph_bucket_t * buckets, chd_ph_item_t * items, +static inline cmph_uint8 chd_ph_mapping(cmph_config_t *mph, chd_ph_bucket_t * buckets, chd_ph_item_t * items, cmph_uint32 *max_bucket_size); static chd_ph_sorted_list_t * chd_ph_ordering(chd_ph_bucket_t ** _buckets,chd_ph_item_t ** items, @@ -131,7 +131,7 @@ static inline double chd_ph_get_entropy(cmph_uint32 * disp_table, cmph_uint32 n, { probe_counts[disp_table[i]]++; }; - + for(i = 0; i < max_probes; i++) { if(probe_counts[i] > 0) @@ -145,9 +145,9 @@ chd_ph_config_data_t *chd_ph_config_new(void) { chd_ph_config_data_t *chd_ph; chd_ph = (chd_ph_config_data_t *)malloc(sizeof(chd_ph_config_data_t)); - assert(chd_ph); + if (!chd_ph) return NULL; memset(chd_ph, 0, sizeof(chd_ph_config_data_t)); - + chd_ph->hashfunc = CMPH_HASH_JENKINS; chd_ph->cs = NULL; chd_ph->nbuckets = 0; @@ -159,7 +159,7 @@ chd_ph_config_data_t *chd_ph_config_new(void) chd_ph->keys_per_bin = 1; chd_ph->keys_per_bucket = 4; chd_ph->occup_table = 0; - + return chd_ph; } @@ -184,7 +184,7 @@ void chd_ph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs) while(*hashptr != CMPH_HASH_COUNT) { if (i >= 1) break; //chd_ph only uses one linear hash function - chd_ph->hashfunc = *hashptr; + chd_ph->hashfunc = *hashptr; ++i, ++hashptr; } } @@ -228,24 +228,24 @@ cmph_uint8 chd_ph_mapping(cmph_config_t *mph, chd_ph_bucket_t * buckets, chd_ph_ { mapping_iterations--; if (chd_ph->hl) hash_state_destroy(chd_ph->hl); - chd_ph->hl = hash_state_new(chd_ph->hashfunc, chd_ph->m); + chd_ph->hl = hash_state_new(chd_ph->hashfunc, chd_ph->m); chd_ph_bucket_clean(buckets, chd_ph->nbuckets); - mph->key_source->rewind(mph->key_source->data); + mph->key_source->rewind(mph->key_source->data); for(i = 0; i < chd_ph->m; i++) { - mph->key_source->read(mph->key_source->data, &key, &keylen); + mph->key_source->read(mph->key_source->data, &key, &keylen); hash_vector(chd_ph->hl, key, keylen, hl); - + map_item = (map_items + i); g = hl[0] % chd_ph->nbuckets; map_item->f = hl[1] % chd_ph->n; map_item->h = hl[2] % (chd_ph->n - 1) + 1; map_item->bucket_num=g; - mph->key_source->dispose(mph->key_source->data, key, keylen); + mph->key_source->dispose(mph->key_source->data, key, keylen); // if(buckets[g].size == (chd_ph->keys_per_bucket << 2)) // { // DEBUGP("BUCKET = %u -- SIZE = %u -- MAXIMUM SIZE = %u\n", g, buckets[g].size, (chd_ph->keys_per_bucket << 2)); @@ -275,7 +275,7 @@ cmph_uint8 chd_ph_mapping(cmph_config_t *mph, chd_ph_bucket_t * buckets, chd_ph_ free(map_items); return 1; // SUCCESS } - + if(mapping_iterations == 0) { goto error; @@ -292,7 +292,7 @@ chd_ph_sorted_list_t * chd_ph_ordering(chd_ph_bucket_t ** _buckets, chd_ph_item_ cmph_uint32 nbuckets, cmph_uint32 nitems, cmph_uint32 max_bucket_size) { chd_ph_sorted_list_t * sorted_lists = (chd_ph_sorted_list_t *) calloc(max_bucket_size + 1, sizeof(chd_ph_sorted_list_t)); - + chd_ph_bucket_t * input_buckets = (*_buckets); chd_ph_bucket_t * output_buckets; chd_ph_item_t * input_items = (*_items); @@ -319,7 +319,7 @@ chd_ph_sorted_list_t * chd_ph_ordering(chd_ph_bucket_t ** _buckets, chd_ph_item_ // Store the buckets in a new array which is sorted by bucket sizes output_buckets = calloc(nbuckets, sizeof(chd_ph_bucket_t)); // everything is initialized with zero // non_empty_buckets = nbuckets; - + for(i = 0; i < nbuckets; i++) { bucket_size = input_buckets[i].size; @@ -338,8 +338,8 @@ chd_ph_sorted_list_t * chd_ph_ordering(chd_ph_bucket_t ** _buckets, chd_ph_item_ // Return the buckets sorted in new order and free the old buckets sorted in old order free(input_buckets); (*_buckets) = output_buckets; - - + + // Store the items according to the new order of buckets. output_items = (chd_ph_item_t*)calloc(nitems, sizeof(chd_ph_item_t)); position = 0; @@ -426,26 +426,26 @@ static inline cmph_uint8 place_bucket_probe(chd_ph_config_data_t *chd_ph, chd_ph } position = (cmph_uint32)((item->f + ((cmph_uint64 )item->h) * probe0_num + probe1_num) % chd_ph->n); UNSETBIT32(((cmph_uint32*)chd_ph->occup_table), position); - + // ([position/32]^=(1<<(position%32)); item++; i--; }; }; return 0; - } + } return 1; }; -static inline cmph_uint8 place_bucket(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, chd_ph_item_t * items, cmph_uint32 max_probes, +static inline cmph_uint8 place_bucket(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, chd_ph_item_t * items, cmph_uint32 max_probes, cmph_uint32 * disp_table, cmph_uint32 bucket_num, cmph_uint32 size) - + { register cmph_uint32 probe0_num, probe1_num, probe_num; probe0_num = 0; probe1_num = 0; probe_num = 0; - + while(1) { if(place_bucket_probe(chd_ph, buckets, items, probe0_num, probe1_num, bucket_num,size)) @@ -469,7 +469,7 @@ static inline cmph_uint8 place_bucket(chd_ph_config_data_t *chd_ph, chd_ph_bucke }; static inline cmph_uint8 place_buckets1(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t * buckets, chd_ph_item_t *items, - cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes, + cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes, cmph_uint32 * disp_table) { register cmph_uint32 i = 0; @@ -490,8 +490,8 @@ static inline cmph_uint8 place_buckets1(chd_ph_config_data_t *chd_ph, chd_ph_buc return 1; }; -static inline cmph_uint8 place_buckets2(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, chd_ph_item_t * items, - cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes, +static inline cmph_uint8 place_buckets2(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, chd_ph_item_t * items, + cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes, cmph_uint32 * disp_table) { register cmph_uint32 i,j, non_placed_bucket; @@ -516,10 +516,10 @@ static inline cmph_uint8 place_buckets2(chd_ph_config_data_t *chd_ph, chd_ph_buc { // if bucket is successfully placed remove it from list if(place_bucket_probe(chd_ph, buckets, items, probe0_num, probe1_num, curr_bucket, i)) - { + { disp_table[buckets[curr_bucket].bucket_id] = probe0_num + probe1_num * chd_ph->n; // DEBUGP("BUCKET %u PLACED --- DISPLACEMENT = %u\n", curr_bucket, disp_table[curr_bucket]); - } + } else { // DEBUGP("BUCKET %u NOT PLACED\n", curr_bucket); @@ -529,7 +529,7 @@ static inline cmph_uint8 place_buckets2(chd_ph_config_data_t *chd_ph, chd_ph_buc #endif buckets[non_placed_bucket + sorted_lists[i].buckets_list].items_list = buckets[curr_bucket].items_list; buckets[non_placed_bucket + sorted_lists[i].buckets_list].bucket_id = buckets[curr_bucket].bucket_id; -#ifdef DEBUG +#ifdef DEBUG buckets[curr_bucket].items_list=items_list; buckets[curr_bucket].bucket_id=bucket_id; #endif @@ -557,7 +557,7 @@ static inline cmph_uint8 place_buckets2(chd_ph_config_data_t *chd_ph, chd_ph_buc }; cmph_uint8 chd_ph_searching(chd_ph_config_data_t *chd_ph, chd_ph_bucket_t *buckets, chd_ph_item_t *items , - cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes, + cmph_uint32 max_bucket_size, chd_ph_sorted_list_t *sorted_lists, cmph_uint32 max_probes, cmph_uint32 * disp_table) { if(chd_ph->use_h) @@ -582,7 +582,7 @@ static inline cmph_uint8 chd_ph_check_bin_hashing(chd_ph_config_data_t *chd_ph, memset(chd_ph->occup_table, 0, chd_ph->n); else memset(chd_ph->occup_table, 0, ((chd_ph->n + 31)/32) * sizeof(cmph_uint32)); - + for(bucket_size = 1; bucket_size <= max_bucket_size; bucket_size++) for(i = sorted_lists[bucket_size].buckets_list; i < sorted_lists[bucket_size].size + sorted_lists[bucket_size].buckets_list; i++) @@ -602,7 +602,7 @@ static inline cmph_uint8 chd_ph_check_bin_hashing(chd_ph_config_data_t *chd_ph, return 0; } (chd_ph->occup_table[position])++; - } + } else { if(GETBIT32(((cmph_uint32*)chd_ph->occup_table), position)) @@ -624,7 +624,7 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c) cmph_t *mphf = NULL; chd_ph_data_t *chd_phf = NULL; chd_ph_config_data_t *chd_ph = (chd_ph_config_data_t *)mph->data; - + register double load_factor = c; register cmph_uint8 searching_success = 0; register cmph_uint32 max_probes = 1 << 20; // default value for max_probes @@ -645,24 +645,24 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c) chd_ph->m = mph->key_source->nkeys; DEBUGP("m = %u\n", chd_ph->m); - + chd_ph->nbuckets = (cmph_uint32)(chd_ph->m/chd_ph->keys_per_bucket) + 1; DEBUGP("nbuckets = %u\n", chd_ph->nbuckets); - + if(load_factor < 0.5 ) { load_factor = 0.5; } - + if(load_factor >= 0.99) { load_factor = 0.99; } - + DEBUGP("load_factor = %.3f\n", load_factor); - + chd_ph->n = (cmph_uint32)(chd_ph->m/(chd_ph->keys_per_bin * load_factor)) + 1; - + //Round the number of bins to the prime immediately above if(chd_ph->n % 2 == 0) chd_ph->n++; for(;;) @@ -670,35 +670,35 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c) if(check_primality(chd_ph->n) == 1) break; chd_ph->n += 2; // just odd numbers can be primes for n > 2 - + }; - + DEBUGP("n = %u \n", chd_ph->n); if(chd_ph->keys_per_bin == 1) { space_lower_bound = chd_ph_space_lower_bound(chd_ph->m, chd_ph->n); } - + if(mph->verbosity) { fprintf(stderr, "space lower bound is %.3f bits per key\n", space_lower_bound); } // We allocate the working tables - buckets = chd_ph_bucket_new(chd_ph->nbuckets); + buckets = chd_ph_bucket_new(chd_ph->nbuckets); items = (chd_ph_item_t *) calloc(chd_ph->m, sizeof(chd_ph_item_t)); max_probes = (cmph_uint32)(((log(chd_ph->m)/log(2))/20) * max_probes); - + if(chd_ph->keys_per_bin == 1) chd_ph->occup_table = (cmph_uint8 *) calloc(((chd_ph->n + 31)/32), sizeof(cmph_uint32)); else chd_ph->occup_table = (cmph_uint8 *) calloc(chd_ph->n, sizeof(cmph_uint8)); - + disp_table = (cmph_uint32 *) calloc(chd_ph->nbuckets, sizeof(cmph_uint32)); -// +// // init_genrand(time(0)); - + while(1) { iterations --; @@ -706,12 +706,12 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c) { fprintf(stderr, "Starting mapping step for mph creation of %u keys with %u bins\n", chd_ph->m, chd_ph->n); } - + if(!chd_ph_mapping(mph, buckets, items, &max_bucket_size)) { if (mph->verbosity) { - fprintf(stderr, "Failure in mapping step\n"); + fprintf(stderr, "Failure in mapping step\n"); } failure = 1; goto cleanup; @@ -727,15 +727,15 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c) } sorted_lists = chd_ph_ordering(&buckets, &items, chd_ph->nbuckets, chd_ph->m, max_bucket_size); - + if (mph->verbosity) { fprintf(stderr, "Starting searching step\n"); } - + searching_success = chd_ph_searching(chd_ph, buckets, items, max_bucket_size, sorted_lists, max_probes, disp_table); if(searching_success) break; - + // reset occup_table if(chd_ph->keys_per_bin > 1) memset(chd_ph->occup_table, 0, chd_ph->n); @@ -757,19 +757,19 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c) { if(!chd_ph_check_bin_hashing(chd_ph, buckets, items, disp_table,sorted_lists,max_bucket_size)) { - + DEBUGP("Error for bin packing generation"); failure = 1; goto cleanup; } } #endif - + if (mph->verbosity) { fprintf(stderr, "Starting compressing step\n"); } - + if(chd_ph->cs) { free(chd_ph->cs); @@ -777,7 +777,7 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c) chd_ph->cs = (compressed_seq_t *) calloc(1, sizeof(compressed_seq_t)); compressed_seq_init(chd_ph->cs); compressed_seq_generate(chd_ph->cs, disp_table, chd_ph->nbuckets); - + #ifdef CMPH_TIMING ELAPSED_TIME_IN_SECONDS(&construction_time); register double entropy = chd_ph_get_entropy(disp_table, chd_ph->nbuckets, max_probes); @@ -785,11 +785,11 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c) #endif cleanup: - chd_ph_bucket_destroy(buckets); + chd_ph_bucket_destroy(buckets); free(items); free(sorted_lists); free(disp_table); - if(failure) + if(failure) { if(chd_ph->hl) { @@ -802,14 +802,14 @@ cleanup: mphf = (cmph_t *)malloc(sizeof(cmph_t)); mphf->algo = mph->algo; chd_phf = (chd_ph_data_t *)malloc(sizeof(chd_ph_data_t)); - + chd_phf->cs = chd_ph->cs; chd_ph->cs = NULL; //transfer memory ownership chd_phf->hl = chd_ph->hl; chd_ph->hl = NULL; //transfer memory ownership chd_phf->n = chd_ph->n; chd_phf->nbuckets = chd_ph->nbuckets; - + mphf->data = chd_phf; mphf->size = chd_ph->n; @@ -818,12 +818,12 @@ cleanup: { fprintf(stderr, "Successfully generated minimal perfect hash function\n"); } - - #ifdef CMPH_TIMING + + #ifdef CMPH_TIMING register cmph_uint32 space_usage = chd_ph_packed_size(mphf)*8; construction_time = construction_time - construction_time_begin; fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\t%.4f\t%.4f\n", chd_ph->m, load_factor, chd_ph->keys_per_bucket, construction_time, space_usage/(double)chd_ph->m, space_lower_bound, entropy/chd_ph->m); - #endif + #endif return mphf; } @@ -846,19 +846,19 @@ void chd_ph_load(FILE *fd, cmph_t *mphf) nbytes = fread(buf, (size_t)buflen, (size_t)1, fd); chd_ph->hl = hash_state_load(buf, buflen); free(buf); - + nbytes = fread(&buflen, sizeof(cmph_uint32), (size_t)1, fd); DEBUGP("Compressed sequence structure has %u bytes\n", buflen); buf = (char *)malloc((size_t)buflen); nbytes = fread(buf, (size_t)buflen, (size_t)1, fd); - chd_ph->cs = (compressed_seq_t *) calloc(1, sizeof(compressed_seq_t)); + chd_ph->cs = (compressed_seq_t *) calloc(1, sizeof(compressed_seq_t)); compressed_seq_load(chd_ph->cs, buf, buflen); free(buf); - + // loading n and nbuckets DEBUGP("Reading n and nbuckets\n"); - nbytes = fread(&(chd_ph->n), sizeof(cmph_uint32), (size_t)1, fd); - nbytes = fread(&(chd_ph->nbuckets), sizeof(cmph_uint32), (size_t)1, fd); + nbytes = fread(&(chd_ph->n), sizeof(cmph_uint32), (size_t)1, fd); + nbytes = fread(&(chd_ph->nbuckets), sizeof(cmph_uint32), (size_t)1, fd); } int chd_ph_dump(cmph_t *mphf, FILE *fd) @@ -867,7 +867,7 @@ int chd_ph_dump(cmph_t *mphf, FILE *fd) cmph_uint32 buflen; register size_t nbytes; chd_ph_data_t *data = (chd_ph_data_t *)mphf->data; - + __cmph_dump(mphf, fd); hash_state_dump(data->hl, &buf, &buflen); @@ -906,11 +906,11 @@ cmph_uint32 chd_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen) register cmph_uint32 disp,position; register cmph_uint32 probe0_num,probe1_num; register cmph_uint32 f,g,h; - hash_vector(chd_ph->hl, key, keylen, hl); + hash_vector(chd_ph->hl, key, keylen, hl); g = hl[0] % chd_ph->nbuckets; f = hl[1] % chd_ph->n; h = hl[2] % (chd_ph->n-1) + 1; - + disp = compressed_seq_query(chd_ph->cs, g); probe0_num = disp % chd_ph->n; probe1_num = disp/chd_ph->n; @@ -949,10 +949,10 @@ void chd_ph_pack(cmph_t *mphf, void *packed_mphf) cmph_uint32 chd_ph_packed_size(cmph_t *mphf) { register chd_ph_data_t *data = (chd_ph_data_t *)mphf->data; - register CMPH_HASH hl_type = hash_get_type(data->hl); + register CMPH_HASH hl_type = hash_get_type(data->hl); register cmph_uint32 hash_state_pack_size = hash_state_packed_size(hl_type); register cmph_uint32 cs_pack_size = compressed_seq_packed_size(data->cs); - + return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_pack_size + cs_pack_size + 3*sizeof(cmph_uint32)); } @@ -961,28 +961,25 @@ cmph_uint32 chd_ph_search_packed(void *packed_mphf, const char *key, cmph_uint32 { register CMPH_HASH hl_type = *(cmph_uint32 *)packed_mphf; register cmph_uint8 *hl_ptr = (cmph_uint8 *)(packed_mphf) + 4; - + register cmph_uint32 * ptr = (cmph_uint32 *)(hl_ptr + hash_state_packed_size(hl_type)); register cmph_uint32 n = *ptr++; register cmph_uint32 nbuckets = *ptr++; cmph_uint32 hl[3]; - + register cmph_uint32 disp,position; register cmph_uint32 probe0_num,probe1_num; register cmph_uint32 f,g,h; - + hash_vector_packed(hl_ptr, hl_type, key, keylen, hl); g = hl[0] % nbuckets; f = hl[1] % n; h = hl[2] % (n-1) + 1; - + disp = compressed_seq_query_packed(ptr, g); probe0_num = disp % n; probe1_num = disp/n; position = (cmph_uint32)((f + ((cmph_uint64 )h)*probe0_num + probe1_num) % n); return position; } - - - diff --git a/src/chm.c b/src/chm.c index 9cdbf41..5c416b1 100644 --- a/src/chm.c +++ b/src/chm.c @@ -21,7 +21,7 @@ chm_config_data_t *chm_config_new(void) { chm_config_data_t *chm = NULL; chm = (chm_config_data_t *)malloc(sizeof(chm_config_data_t)); - assert(chm); + if (!chm) return NULL; memset(chm, 0, sizeof(chm_config_data_t)); chm->hashfuncs[0] = CMPH_HASH_JENKINS; chm->hashfuncs[1] = CMPH_HASH_JENKINS; @@ -45,7 +45,7 @@ void chm_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs) while(*hashptr != CMPH_HASH_COUNT) { if (i >= 2) break; //chm only uses two hash functions - chm->hashfuncs[i] = *hashptr; + chm->hashfuncs[i] = *hashptr; ++i, ++hashptr; } } @@ -61,7 +61,7 @@ cmph_t *chm_new(cmph_config_t *mph, double c) chm_config_data_t *chm = (chm_config_data_t *)mph->data; chm->m = mph->key_source->nkeys; if (c == 0) c = 2.09; - chm->n = (cmph_uint32)ceil(c * mph->key_source->nkeys); + chm->n = (cmph_uint32)ceil(c * mph->key_source->nkeys); DEBUGP("m (edges): %u n (vertices): %u c: %f\n", chm->m, chm->n, c); chm->graph = graph_new(chm->n, chm->m); DEBUGP("Created graph\n"); @@ -92,12 +92,12 @@ cmph_t *chm_new(cmph_config_t *mph, double c) fprintf(stderr, "Acyclic graph creation failure - %u iterations remaining\n", iterations); } if (iterations == 0) break; - } - else break; + } + else break; } if (iterations == 0) { - graph_destroy(chm->graph); + graph_destroy(chm->graph); return NULL; } @@ -120,7 +120,7 @@ cmph_t *chm_new(cmph_config_t *mph, double c) chm_traverse(chm, visited, i); } } - graph_destroy(chm->graph); + graph_destroy(chm->graph); free(visited); chm->graph = NULL; @@ -149,7 +149,7 @@ static void chm_traverse(chm_config_data_t *chm, cmph_uint8 *visited, cmph_uint3 graph_iterator_t it = graph_neighbors_it(chm->graph, v); cmph_uint32 neighbor = 0; SETBIT(visited,v); - + DEBUGP("Visiting vertex %u\n", v); while((neighbor = graph_next_neighbor(chm->graph, &it)) != GRAPH_NO_NEIGHBOR) { @@ -162,7 +162,7 @@ static void chm_traverse(chm_config_data_t *chm, cmph_uint8 *visited, cmph_uint3 chm_traverse(chm, visited, neighbor); } } - + static int chm_gen_edges(cmph_config_t *mph) { cmph_uint32 e; @@ -170,7 +170,7 @@ static int chm_gen_edges(cmph_config_t *mph) int cycles = 0; DEBUGP("Generating edges for %u vertices with hash functions %s and %s\n", chm->n, cmph_hash_names[chm->hashfuncs[0]], cmph_hash_names[chm->hashfuncs[1]]); - graph_clear_edges(chm->graph); + graph_clear_edges(chm->graph); mph->key_source->rewind(mph->key_source->data); for (e = 0; e < mph->key_source->nkeys; ++e) { @@ -181,7 +181,7 @@ static int chm_gen_edges(cmph_config_t *mph) h1 = hash(chm->hashes[0], key, keylen) % chm->n; h2 = hash(chm->hashes[1], key, keylen) % chm->n; if (h1 == h2) if (++h2 >= chm->n) h2 = 0; - if (h1 == h2) + if (h1 == h2) { if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e); mph->key_source->dispose(mph->key_source->data, key, keylen); @@ -205,7 +205,7 @@ int chm_dump(cmph_t *mphf, FILE *fd) cmph_uint32 two = 2; //number of hash functions chm_data_t *data = (chm_data_t *)mphf->data; register size_t nbytes; - + __cmph_dump(mphf, fd); nbytes = fwrite(&two, sizeof(cmph_uint32), (size_t)1, fd); @@ -223,7 +223,7 @@ int chm_dump(cmph_t *mphf, FILE *fd) nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd); nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd); - + nbytes = fwrite(data->g, sizeof(cmph_uint32)*data->n, (size_t)1, fd); /* #ifdef DEBUG fprintf(stderr, "G: "); @@ -260,8 +260,8 @@ void chm_load(FILE *f, cmph_t *mphf) } DEBUGP("Reading m and n\n"); - nbytes = fread(&(chm->n), sizeof(cmph_uint32), (size_t)1, f); - nbytes = fread(&(chm->m), sizeof(cmph_uint32), (size_t)1, f); + nbytes = fread(&(chm->n), sizeof(cmph_uint32), (size_t)1, f); + nbytes = fread(&(chm->m), sizeof(cmph_uint32), (size_t)1, f); chm->g = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*chm->n); nbytes = fread(chm->g, chm->n*sizeof(cmph_uint32), (size_t)1, f); @@ -272,7 +272,7 @@ void chm_load(FILE *f, cmph_t *mphf) #endif return; } - + cmph_uint32 chm_search(cmph_t *mphf, const char *key, cmph_uint32 keylen) { @@ -287,7 +287,7 @@ cmph_uint32 chm_search(cmph_t *mphf, const char *key, cmph_uint32 keylen) void chm_destroy(cmph_t *mphf) { chm_data_t *data = (chm_data_t *)mphf->data; - free(data->g); + free(data->g); hash_state_destroy(data->hashes[0]); hash_state_destroy(data->hashes[1]); free(data->hashes); @@ -298,7 +298,7 @@ void chm_destroy(cmph_t *mphf) /** \fn void chm_pack(cmph_t *mphf, void *packed_mphf); * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. * \param mphf pointer to the resulting mphf - * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() + * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() */ void chm_pack(cmph_t *mphf, void *packed_mphf) { @@ -332,26 +332,26 @@ void chm_pack(cmph_t *mphf, void *packed_mphf) ptr += sizeof(data->m); // packing g - memcpy(ptr, data->g, sizeof(cmph_uint32)*data->n); + memcpy(ptr, data->g, sizeof(cmph_uint32)*data->n); } /** \fn cmph_uint32 chm_packed_size(cmph_t *mphf); * \brief Return the amount of space needed to pack mphf. * \param mphf pointer to a mphf * \return the size of the packed function or zero for failures - */ + */ cmph_uint32 chm_packed_size(cmph_t *mphf) { chm_data_t *data = (chm_data_t *)mphf->data; - CMPH_HASH h1_type = hash_get_type(data->hashes[0]); - CMPH_HASH h2_type = hash_get_type(data->hashes[1]); + CMPH_HASH h1_type = hash_get_type(data->hashes[0]); + CMPH_HASH h2_type = hash_get_type(data->hashes[1]); - return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) + + return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) + 4*sizeof(cmph_uint32) + sizeof(cmph_uint32)*data->n); } /** cmph_uint32 chm_search(void *packed_mphf, const char *key, cmph_uint32 keylen); - * \brief Use the packed mphf to do a search. + * \brief Use the packed mphf to do a search. * \param packed_mphf pointer to the packed mphf * \param key key to be hashed * \param keylen key legth in bytes @@ -366,16 +366,16 @@ cmph_uint32 chm_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type); register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr); h2_ptr += 4; - + register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type)); - - register cmph_uint32 n = *g_ptr++; - register cmph_uint32 m = *g_ptr++; - - register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n; - register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n; + + register cmph_uint32 n = *g_ptr++; + register cmph_uint32 m = *g_ptr++; + + register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n; + register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n; DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2); if (h1 == h2 && ++h2 >= n) h2 = 0; DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, g_ptr[h1], g_ptr[h2], m); - return (g_ptr[h1] + g_ptr[h2]) % m; + return (g_ptr[h1] + g_ptr[h2]) % m; } diff --git a/src/cmph.c b/src/cmph.c index b0c33bf..f460dd0 100644 --- a/src/cmph.c +++ b/src/cmph.c @@ -1,10 +1,10 @@ #include "cmph.h" #include "cmph_structs.h" #include "chm.h" -#include "bmz.h" -#include "bmz8.h" -#include "brz.h" -#include "fch.h" +#include "bmz.h" +#include "bmz8.h" +#include "brz.h" +#include "fch.h" #include "bdz.h" #include "bdz_ph.h" #include "chd_ph.h" @@ -18,18 +18,18 @@ const char *cmph_names[] = {"bmz", "bmz8", "chm", "brz", "fch", "bdz", "bdz_ph", "chd_ph", "chd", NULL }; -typedef struct +typedef struct { void *vector; - cmph_uint32 position; // access position when data is a vector + cmph_uint32 position; // access position when data is a vector } cmph_vector_t; -/** +/** * Support a vector of struct as the source of keys. * - * E.g. The keys could be the fieldB's in a vector of struct rec where + * E.g. The keys could be the fieldB's in a vector of struct rec where * struct rec is defined as: * struct rec { * fieldA; @@ -37,7 +37,7 @@ typedef struct * fieldC; * } */ -typedef struct +typedef struct { void *vector; /* Pointer to the vector of struct */ cmph_uint32 position; /* current position */ @@ -61,7 +61,7 @@ static int key_nlfile_read(void *data, char **key, cmph_uint32 *keylen) while(1) { char buf[BUFSIZ]; - char *c = fgets(buf, BUFSIZ, fd); + char *c = fgets(buf, BUFSIZ, fd); if (c == NULL) return -1; if (feof(fd)) return -1; *key = (char *)realloc(*key, *keylen + strlen(buf) + 1); @@ -156,8 +156,12 @@ static cmph_uint32 count_nlfile_keys(FILE *fd) while(1) { char buf[BUFSIZ]; - ptr = fgets(buf, BUFSIZ, fd); + ptr = fgets(buf, BUFSIZ, fd); if (feof(fd)) break; + if (ferror(fd) || ptr == NULL) { + perror("Error reading input file"); + return 0; + } if (buf[strlen(buf) - 1] != '\n') continue; ++count; } @@ -264,12 +268,12 @@ cmph_io_adapter_t *cmph_io_struct_vector_adapter(void * vector, cmph_uint32 stru key_source->read = key_struct_vector_read; key_source->dispose = key_vector_dispose; key_source->rewind = key_struct_vector_rewind; - return key_source; + return key_source; } void cmph_io_struct_vector_adapter_destroy(cmph_io_adapter_t * key_source) { - cmph_io_struct_vector_destroy(key_source); + cmph_io_struct_vector_destroy(key_source); } cmph_io_adapter_t *cmph_io_vector_adapter(char ** vector, cmph_uint32 nkeys) @@ -370,7 +374,7 @@ void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo) void cmph_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir) { - if (mph->algo == CMPH_BRZ) + if (mph->algo == CMPH_BRZ) { brz_config_set_tmp_dir(mph, tmp_dir); } @@ -379,7 +383,7 @@ void cmph_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir) void cmph_config_set_mphf_fd(cmph_config_t *mph, FILE *mphf_fd) { - if (mph->algo == CMPH_BRZ) + if (mph->algo == CMPH_BRZ) { brz_config_set_mphf_fd(mph, mphf_fd); } @@ -387,19 +391,19 @@ void cmph_config_set_mphf_fd(cmph_config_t *mph, FILE *mphf_fd) void cmph_config_set_b(cmph_config_t *mph, cmph_uint32 b) { - if (mph->algo == CMPH_BRZ) + if (mph->algo == CMPH_BRZ) { brz_config_set_b(mph, b); } - else if (mph->algo == CMPH_BDZ) + else if (mph->algo == CMPH_BDZ) { bdz_config_set_b(mph, b); } - else if (mph->algo == CMPH_CHD_PH) + else if (mph->algo == CMPH_CHD_PH) { chd_ph_config_set_b(mph, b); } - else if (mph->algo == CMPH_CHD) + else if (mph->algo == CMPH_CHD) { chd_config_set_b(mph, b); } @@ -407,11 +411,11 @@ void cmph_config_set_b(cmph_config_t *mph, cmph_uint32 b) void cmph_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin) { - if (mph->algo == CMPH_CHD_PH) + if (mph->algo == CMPH_CHD_PH) { chd_ph_config_set_keys_per_bin(mph, keys_per_bin); } - else if (mph->algo == CMPH_CHD) + else if (mph->algo == CMPH_CHD) { chd_config_set_keys_per_bin(mph, keys_per_bin); } @@ -419,7 +423,7 @@ void cmph_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin) void cmph_config_set_memory_availability(cmph_config_t *mph, cmph_uint32 memory_availability) { - if (mph->algo == CMPH_BRZ) + if (mph->algo == CMPH_BRZ) { brz_config_set_memory_availability(mph, memory_availability); } @@ -519,7 +523,7 @@ cmph_t *cmph_new(cmph_config_t *mph) double c = mph->c; DEBUGP("Creating mph with algorithm %s\n", cmph_names[mph->algo]); - switch (mph->algo) + switch (mph->algo) { case CMPH_CHM: DEBUGP("Creating chm hash\n"); @@ -654,28 +658,28 @@ cmph_uint32 cmph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen) case CMPH_CHM: return chm_search(mphf, key, keylen); case CMPH_BMZ: /* included -- Fabiano */ - DEBUGP("bmz algorithm search\n"); + DEBUGP("bmz algorithm search\n"); return bmz_search(mphf, key, keylen); case CMPH_BMZ8: /* included -- Fabiano */ - DEBUGP("bmz8 algorithm search\n"); + DEBUGP("bmz8 algorithm search\n"); return bmz8_search(mphf, key, keylen); case CMPH_BRZ: /* included -- Fabiano */ - DEBUGP("brz algorithm search\n"); + DEBUGP("brz algorithm search\n"); return brz_search(mphf, key, keylen); case CMPH_FCH: /* included -- Fabiano */ - DEBUGP("fch algorithm search\n"); + DEBUGP("fch algorithm search\n"); return fch_search(mphf, key, keylen); case CMPH_BDZ: /* included -- Fabiano */ - DEBUGP("bdz algorithm search\n"); + DEBUGP("bdz algorithm search\n"); return bdz_search(mphf, key, keylen); case CMPH_BDZ_PH: /* included -- Fabiano */ - DEBUGP("bdz_ph algorithm search\n"); + DEBUGP("bdz_ph algorithm search\n"); return bdz_ph_search(mphf, key, keylen); case CMPH_CHD_PH: /* included -- Fabiano */ - DEBUGP("chd_ph algorithm search\n"); + DEBUGP("chd_ph algorithm search\n"); return chd_ph_search(mphf, key, keylen); case CMPH_CHD: /* included -- Fabiano */ - DEBUGP("chd algorithm search\n"); + DEBUGP("chd algorithm search\n"); return chd_search(mphf, key, keylen); default: assert(0); @@ -688,7 +692,7 @@ cmph_uint32 cmph_size(cmph_t *mphf) { return mphf->size; } - + void cmph_destroy(cmph_t *mphf) { switch(mphf->algo) @@ -720,7 +724,7 @@ void cmph_destroy(cmph_t *mphf) case CMPH_CHD: /* included -- Fabiano */ chd_destroy(mphf); return; - default: + default: assert(0); } assert(0); @@ -731,12 +735,12 @@ void cmph_destroy(cmph_t *mphf) /** \fn void cmph_pack(cmph_t *mphf, void *packed_mphf); * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. * \param mphf pointer to the resulting mphf - * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() + * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() */ void cmph_pack(cmph_t *mphf, void *packed_mphf) { // packing algorithm type to be used in cmph.c - cmph_uint32 * ptr = (cmph_uint32 *) packed_mphf; + cmph_uint32 * ptr = (cmph_uint32 *) packed_mphf; *ptr++ = mphf->algo; DEBUGP("mphf->algo = %u\n", mphf->algo); switch(mphf->algo) @@ -768,7 +772,7 @@ void cmph_pack(cmph_t *mphf, void *packed_mphf) case CMPH_CHD: /* included -- Fabiano */ chd_pack(mphf, ptr); break; - default: + default: assert(0); } return; @@ -778,7 +782,7 @@ void cmph_pack(cmph_t *mphf, void *packed_mphf) * \brief Return the amount of space needed to pack mphf. * \param mphf pointer to a mphf * \return the size of the packed function or zero for failures - */ + */ cmph_uint32 cmph_packed_size(cmph_t *mphf) { switch(mphf->algo) @@ -801,14 +805,14 @@ cmph_uint32 cmph_packed_size(cmph_t *mphf) return chd_ph_packed_size(mphf); case CMPH_CHD: /* included -- Fabiano */ return chd_packed_size(mphf); - default: + default: assert(0); } return 0; // FAILURE } /** cmph_uint32 cmph_search(void *packed_mphf, const char *key, cmph_uint32 keylen); - * \brief Use the packed mphf to do a search. + * \brief Use the packed mphf to do a search. * \param packed_mphf pointer to the packed mphf * \param key key to be hashed * \param keylen key legth in bytes @@ -838,7 +842,7 @@ cmph_uint32 cmph_search_packed(void *packed_mphf, const char *key, cmph_uint32 k return chd_ph_search_packed(++ptr, key, keylen); case CMPH_CHD: /* included -- Fabiano */ return chd_search_packed(++ptr, key, keylen); - default: + default: assert(0); } return 0; // FAILURE diff --git a/src/cmph_structs.c b/src/cmph_structs.c index bcd3da3..2c28bc3 100644 --- a/src/cmph_structs.c +++ b/src/cmph_structs.c @@ -28,7 +28,7 @@ void __cmph_dump(cmph_t *mphf, FILE *fd) nbytes = fwrite(cmph_names[mphf->algo], (size_t)(strlen(cmph_names[mphf->algo]) + 1), (size_t)1, fd); nbytes = fwrite(&(mphf->size), sizeof(mphf->size), (size_t)1, fd); } -cmph_t *__cmph_load(FILE *f) +cmph_t *__cmph_load(FILE *f) { cmph_t *mphf = NULL; cmph_uint32 i; @@ -36,7 +36,7 @@ cmph_t *__cmph_load(FILE *f) char *ptr = algo_name; CMPH_ALGO algo = CMPH_COUNT; register size_t nbytes; - + DEBUGP("Loading mphf\n"); while(1) { @@ -52,7 +52,7 @@ cmph_t *__cmph_load(FILE *f) algo = i; } } - if (algo == CMPH_COUNT) + if (algo == CMPH_COUNT) { DEBUGP("Algorithm %s not found\n", algo_name); return NULL; @@ -65,5 +65,3 @@ cmph_t *__cmph_load(FILE *f) return mphf; } - - diff --git a/src/djb2_hash.c b/src/djb2_hash.c index d3b4330..25f8220 100644 --- a/src/djb2_hash.c +++ b/src/djb2_hash.c @@ -4,6 +4,7 @@ djb2_state_t *djb2_state_new() { djb2_state_t *state = (djb2_state_t *)malloc(sizeof(djb2_state_t)); + if (!djb2_state) return NULL; state->hashfunc = CMPH_HASH_DJB2; return state; } @@ -18,7 +19,7 @@ cmph_uint32 djb2_hash(djb2_state_t *state, const char *k, cmph_uint32 keylen) register cmph_uint32 hash = 5381; const unsigned char *ptr = (unsigned char *)k; cmph_uint32 i = 0; - while (i < keylen) + while (i < keylen) { hash = hash*33 ^ *ptr; ++ptr, ++i; diff --git a/src/fch.c b/src/fch.c index 67b68fb..9ca4e03 100644 --- a/src/fch.c +++ b/src/fch.c @@ -23,7 +23,7 @@ fch_config_data_t *fch_config_new() { fch_config_data_t *fch; fch = (fch_config_data_t *)malloc(sizeof(fch_config_data_t)); - assert(fch); + if (!fch) return NULL; memset(fch, 0, sizeof(fch_config_data_t)); fch->hashfuncs[0] = CMPH_HASH_JENKINS; fch->hashfuncs[1] = CMPH_HASH_JENKINS; @@ -50,7 +50,7 @@ void fch_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs) while(*hashptr != CMPH_HASH_COUNT) { if (i >= 2) break; //fch only uses two hash functions - fch->hashfuncs[i] = *hashptr; + fch->hashfuncs[i] = *hashptr; ++i, ++hashptr; } } @@ -88,36 +88,36 @@ static fch_buckets_t * mapping(cmph_config_t *mph) fch_buckets_t *buckets = NULL; fch_config_data_t *fch = (fch_config_data_t *)mph->data; if (fch->h1) hash_state_destroy(fch->h1); - fch->h1 = hash_state_new(fch->hashfuncs[0], fch->m); + fch->h1 = hash_state_new(fch->hashfuncs[0], fch->m); fch->b = fch_calc_b(fch->c, fch->m); fch->p1 = fch_calc_p1(fch->m); fch->p2 = fch_calc_p2(fch->b); //DEBUGP("b:%u p1:%f p2:%f\n", fch->b, fch->p1, fch->p2); buckets = fch_buckets_new(fch->b); - mph->key_source->rewind(mph->key_source->data); + mph->key_source->rewind(mph->key_source->data); for(i = 0; i < fch->m; i++) { cmph_uint32 h1, keylen; char *key = NULL; - mph->key_source->read(mph->key_source->data, &key, &keylen); + mph->key_source->read(mph->key_source->data, &key, &keylen); h1 = hash(fch->h1, key, keylen) % fch->m; h1 = mixh10h11h12 (fch->b, fch->p1, fch->p2, h1); fch_buckets_insert(buckets, h1, key, keylen); key = NULL; // transger memory ownership - + } - return buckets; + return buckets; } -// returns the buckets indexes sorted by their sizes. +// returns the buckets indexes sorted by their sizes. static cmph_uint32 * ordering(fch_buckets_t * buckets) { return fch_buckets_get_indexes_sorted_by_size(buckets); } -/* Check whether function h2 causes collisions among the keys of each bucket */ +/* Check whether function h2 causes collisions among the keys of each bucket */ static cmph_uint8 check_for_collisions_h2(fch_config_data_t *fch, fch_buckets_t * buckets, cmph_uint32 *sorted_indexes) { //cmph_uint32 max_size = fch_buckets_get_max_size(buckets); @@ -146,7 +146,7 @@ static cmph_uint8 check_for_collisions_h2(fch_config_data_t *fch, fch_buckets_t } static void permut(cmph_uint32 * vector, cmph_uint32 n) -{ +{ cmph_uint32 i, j, b; for (i = 0; i < n; i++) { j = (cmph_uint32) rand() % n; @@ -179,12 +179,12 @@ static cmph_uint8 searching(fch_config_data_t *fch, fch_buckets_t *buckets, cmph { map_table[random_table[i]] = i; } - do { + do { if (fch->h2) hash_state_destroy(fch->h2); - fch->h2 = hash_state_new(fch->hashfuncs[1], fch->m); + fch->h2 = hash_state_new(fch->hashfuncs[1], fch->m); restart = check_for_collisions_h2(fch, buckets, sorted_indexes); filled_count = 0; - if (!restart) + if (!restart) { searching_iterations++; iteration_to_generate_h2 = 0; //DEBUGP("searching_iterations: %u\n", searching_iterations); @@ -192,7 +192,7 @@ static cmph_uint8 searching(fch_config_data_t *fch, fch_buckets_t *buckets, cmph else { iteration_to_generate_h2++; //DEBUGP("iteration_to_generate_h2: %u\n", iteration_to_generate_h2); - } + } for(i = 0; (i < nbuckets) && !restart; i++) { cmph_uint32 bucketsize = fch_buckets_get_size(buckets, sorted_indexes[i]); if (bucketsize == 0) @@ -204,8 +204,8 @@ static cmph_uint8 searching(fch_config_data_t *fch, fch_buckets_t *buckets, cmph for(z = 0; (z < (fch->m - filled_count)) && restart; z++) { char * key = fch_buckets_get_key(buckets, sorted_indexes[i], INDEX); cmph_uint32 keylen = fch_buckets_get_keylength(buckets, sorted_indexes[i], INDEX); - cmph_uint32 h2 = hash(fch->h2, key, keylen) % fch->m; - counter = 0; + cmph_uint32 h2 = hash(fch->h2, key, keylen) % fch->m; + counter = 0; restart = 0; // false fch->g[sorted_indexes[i]] = (fch->m + random_table[filled_count + z] - h2) % fch->m; //DEBUGP("g[%u]: %u\n", sorted_indexes[i], fch->g[sorted_indexes[i]]); @@ -217,7 +217,7 @@ static cmph_uint8 searching(fch_config_data_t *fch, fch_buckets_t *buckets, cmph h2 = hash(fch->h2, key, keylen) % fch->m; index = (h2 + fch->g[sorted_indexes[i]]) % fch->m; //DEBUGP("key:%s keylen:%u index: %u h2:%u bucketsize:%u\n", key, keylen, index, h2, bucketsize); - if (map_table[index] >= filled_count) { + if (map_table[index] >= filled_count) { cmph_uint32 y = map_table[index]; cmph_uint32 ry = random_table[y]; random_table[y] = random_table[filled_count]; @@ -225,19 +225,19 @@ static cmph_uint8 searching(fch_config_data_t *fch, fch_buckets_t *buckets, cmph map_table[random_table[y]] = y; map_table[random_table[filled_count]] = filled_count; filled_count++; - counter ++; + counter ++; } - else { + else { restart = 1; // true filled_count = filled_count - counter; - counter = 0; + counter = 0; break; } j = (j + 1) % bucketsize; - } while(j % bucketsize != INDEX); + } while(j % bucketsize != INDEX); } //getchar(); - } + } } while(restart && (searching_iterations < 10) && (iteration_to_generate_h2 < 1000)); free(map_table); free(random_table); @@ -264,7 +264,7 @@ cmph_t *fch_new(cmph_config_t *mph, double c) fch->h2 = NULL; fch->g = NULL; do - { + { if (mph->verbosity) { fprintf(stderr, "Entering mapping step for mph creation of %u keys\n", fch->m); @@ -283,7 +283,7 @@ cmph_t *fch_new(cmph_config_t *mph, double c) } restart_mapping = searching(fch, buckets, sorted_indexes); iterations--; - + } while(restart_mapping && iterations > 0); if (buckets) fch_buckets_destroy(buckets); if (sorted_indexes) free (sorted_indexes); @@ -317,7 +317,7 @@ int fch_dump(cmph_t *mphf, FILE *fd) char *buf = NULL; cmph_uint32 buflen; register size_t nbytes; - + fch_data_t *data = (fch_data_t *)mphf->data; __cmph_dump(mphf, fd); @@ -365,7 +365,7 @@ void fch_load(FILE *f, cmph_t *mphf) nbytes = fread(buf, (size_t)buflen, (size_t)1, f); fch->h1 = hash_state_load(buf, buflen); free(buf); - + //DEBUGP("Loading fch mphf\n"); mphf->data = fch; //DEBUGP("Reading h2\n"); @@ -376,8 +376,8 @@ void fch_load(FILE *f, cmph_t *mphf) nbytes = fread(buf, (size_t)buflen, (size_t)1, f); fch->h2 = hash_state_load(buf, buflen); free(buf); - - + + //DEBUGP("Reading m and n\n"); nbytes = fread(&(fch->m), sizeof(cmph_uint32), (size_t)1, f); nbytes = fread(&(fch->c), sizeof(double), (size_t)1, f); @@ -418,7 +418,7 @@ void fch_destroy(cmph_t *mphf) /** \fn void fch_pack(cmph_t *mphf, void *packed_mphf); * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. * \param mphf pointer to the resulting mphf - * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() + * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() */ void fch_pack(cmph_t *mphf, void *packed_mphf) { @@ -450,37 +450,37 @@ void fch_pack(cmph_t *mphf, void *packed_mphf) // packing b *((cmph_uint32 *) ptr) = data->b; ptr += sizeof(data->b); - + // packing p1 - *((cmph_uint64 *)ptr) = (cmph_uint64)data->p1; + *((cmph_uint64 *)ptr) = (cmph_uint64)data->p1; ptr += sizeof(data->p1); // packing p2 - *((cmph_uint64 *)ptr) = (cmph_uint64)data->p2; + *((cmph_uint64 *)ptr) = (cmph_uint64)data->p2; ptr += sizeof(data->p2); // packing g - memcpy(ptr, data->g, sizeof(cmph_uint32)*(data->b)); + memcpy(ptr, data->g, sizeof(cmph_uint32)*(data->b)); } /** \fn cmph_uint32 fch_packed_size(cmph_t *mphf); * \brief Return the amount of space needed to pack mphf. * \param mphf pointer to a mphf * \return the size of the packed function or zero for failures - */ + */ cmph_uint32 fch_packed_size(cmph_t *mphf) { fch_data_t *data = (fch_data_t *)mphf->data; - CMPH_HASH h1_type = hash_get_type(data->h1); - CMPH_HASH h2_type = hash_get_type(data->h2); + CMPH_HASH h1_type = hash_get_type(data->h1); + CMPH_HASH h2_type = hash_get_type(data->h2); - return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) + + return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) + 4*sizeof(cmph_uint32) + 2*sizeof(double) + sizeof(cmph_uint32)*(data->b)); } /** cmph_uint32 fch_search(void *packed_mphf, const char *key, cmph_uint32 keylen); - * \brief Use the packed mphf to do a search. + * \brief Use the packed mphf to do a search. * \param packed_mphf pointer to the packed mphf * \param key key to be hashed * \param keylen key legth in bytes @@ -495,12 +495,12 @@ cmph_uint32 fch_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type); register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr); h2_ptr += 4; - - register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type)); - - register cmph_uint32 m = *g_ptr++; - register cmph_uint32 b = *g_ptr++; + register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type)); + + register cmph_uint32 m = *g_ptr++; + + register cmph_uint32 b = *g_ptr++; register double p1 = (double)(*((cmph_uint64 *)g_ptr)); g_ptr += 2; @@ -508,10 +508,9 @@ cmph_uint32 fch_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke register double p2 = (double)(*((cmph_uint64 *)g_ptr)); g_ptr += 2; - register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % m; + register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % m; register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % m; h1 = mixh10h11h12 (b, p1, p2, h1); return (h2 + g_ptr[h1]) % m; } - diff --git a/src/fch_buckets.c b/src/fch_buckets.c index a588f14..0c11051 100644 --- a/src/fch_buckets.c +++ b/src/fch_buckets.c @@ -20,7 +20,7 @@ typedef struct __fch_bucket_t -static void fch_bucket_new(fch_bucket_t *bucket) +static void fch_bucket_new(fch_bucket_t *bucket) { assert(bucket); bucket->size = 0; @@ -109,16 +109,16 @@ struct __fch_buckets_t { fch_bucket_t * values; cmph_uint32 nbuckets, max_size; - + }; fch_buckets_t * fch_buckets_new(cmph_uint32 nbuckets) { cmph_uint32 i; fch_buckets_t *buckets = (fch_buckets_t *)malloc(sizeof(fch_buckets_t)); - assert(buckets); + if (!buckets) return NULL; buckets->values = (fch_bucket_t *)calloc((size_t)nbuckets, sizeof(fch_bucket_t)); - for (i = 0; i < nbuckets; i++) fch_bucket_new(buckets->values + i); + for (i = 0; i < nbuckets; i++) fch_bucket_new(buckets->values + i); assert(buckets->values); buckets->nbuckets = nbuckets; buckets->max_size = 0; @@ -135,7 +135,7 @@ void fch_buckets_insert(fch_buckets_t * buckets, cmph_uint32 index, char * key, { assert(index < buckets->nbuckets); fch_bucket_insert(buckets->values + index, key, length); - if (fch_bucket_size(buckets->values + index) > buckets->max_size) + if (fch_bucket_size(buckets->values + index) > buckets->max_size) { buckets->max_size = fch_bucket_size(buckets->values + index); } @@ -170,16 +170,16 @@ cmph_uint32 fch_buckets_get_nbuckets(fch_buckets_t * buckets) return buckets->nbuckets; } -cmph_uint32 * fch_buckets_get_indexes_sorted_by_size(fch_buckets_t * buckets) +cmph_uint32 * fch_buckets_get_indexes_sorted_by_size(fch_buckets_t * buckets) { cmph_uint32 i = 0; cmph_uint32 sum = 0, value; cmph_uint32 *nbuckets_size = (cmph_uint32 *) calloc((size_t)buckets->max_size + 1, sizeof(cmph_uint32)); cmph_uint32 * sorted_indexes = (cmph_uint32 *) calloc((size_t)buckets->nbuckets, sizeof(cmph_uint32)); - + // collect how many buckets for each size. for(i = 0; i < buckets->nbuckets; i++) nbuckets_size[fch_bucket_size(buckets->values + i)] ++; - + // calculating offset considering a decreasing order of buckets size. value = nbuckets_size[buckets->max_size]; nbuckets_size[buckets->max_size] = sum; @@ -188,13 +188,13 @@ cmph_uint32 * fch_buckets_get_indexes_sorted_by_size(fch_buckets_t * buckets) sum += value; value = nbuckets_size[i]; nbuckets_size[i] = sum; - + } - for(i = 0; i < buckets->nbuckets; i++) + for(i = 0; i < buckets->nbuckets; i++) { sorted_indexes[nbuckets_size[fch_bucket_size(buckets->values + i)]] = (cmph_uint32)i; nbuckets_size[fch_bucket_size(buckets->values + i)] ++; - } + } free(nbuckets_size); return sorted_indexes; } @@ -208,7 +208,7 @@ void fch_buckets_print(fch_buckets_t * buckets) void fch_buckets_destroy(fch_buckets_t * buckets) { cmph_uint32 i; - for (i = 0; i < buckets->nbuckets; i++) fch_bucket_destroy(buckets->values + i); + for (i = 0; i < buckets->nbuckets; i++) fch_bucket_destroy(buckets->values + i); free(buckets->values); free(buckets); } diff --git a/src/fnv_hash.c b/src/fnv_hash.c index aeaca8f..0ef1f48 100644 --- a/src/fnv_hash.c +++ b/src/fnv_hash.c @@ -4,6 +4,7 @@ fnv_state_t *fnv_state_new() { fnv_state_t *state = (fnv_state_t *)malloc(sizeof(fnv_state_t)); + if (!state) return NULL; state->hashfunc = CMPH_HASH_FNV; return state; } @@ -15,13 +16,13 @@ void fnv_state_destroy(fnv_state_t *state) cmph_uint32 fnv_hash(fnv_state_t *state, const char *k, cmph_uint32 keylen) { - const unsigned char *bp = (const unsigned char *)k; - const unsigned char *be = bp + keylen; - static unsigned int hval = 0; + const unsigned char *bp = (const unsigned char *)k; + const unsigned char *be = bp + keylen; + static unsigned int hval = 0; - while (bp < be) + while (bp < be) { - + //hval *= 0x01000193; good for non-gcc compiler hval += (hval << 1) + (hval << 4) + (hval << 7) + (hval << 8) + (hval << 24); //good for gcc @@ -41,6 +42,7 @@ void fnv_state_dump(fnv_state_t *state, char **buf, cmph_uint32 *buflen) fnv_state_t * fnv_state_copy(fnv_state_t *src_state) { fnv_state_t *dest_state = (fnv_state_t *)malloc(sizeof(fnv_state_t)); + if (!dest_state) return NULL; dest_state->hashfunc = src_state->hashfunc; return dest_state; } diff --git a/src/graph.c b/src/graph.c index 2e9ddb7..97737ad 100644 --- a/src/graph.c +++ b/src/graph.c @@ -77,7 +77,7 @@ void graph_print(graph_t *g) printf("%u -> %u\n", g->edges[abs_edge(e, 0)], g->edges[abs_edge(e, 1)]); } } - + } return; } @@ -130,7 +130,7 @@ static void del_edge_point(graph_t *g, cmph_uint32 v1, cmph_uint32 v2) DEBUGP("Deleting edge point %u %u\n", v1, v2); e = g->first[v1]; - if (check_edge(g, e, v1, v2)) + if (check_edge(g, e, v1, v2)) { g->first[v1] = g->next[e]; //g->edges[e] = EMPTY; @@ -151,7 +151,7 @@ static void del_edge_point(graph_t *g, cmph_uint32 v1, cmph_uint32 v2) DEBUGP("Deleted\n"); } - + void graph_del_edge(graph_t *g, cmph_uint32 v1, cmph_uint32 v2) { g->shrinking = 1; @@ -163,7 +163,7 @@ void graph_clear_edges(graph_t *g) { cmph_uint32 i; for (i = 0; i < g->nnodes; ++i) g->first[i] = EMPTY; - for (i = 0; i < g->nedges*2; ++i) + for (i = 0; i < g->nedges*2; ++i) { g->edges[i] = EMPTY; g->next[i] = EMPTY; @@ -178,7 +178,7 @@ static cmph_uint8 find_degree1_edge(graph_t *g, cmph_uint32 v, cmph_uint8 *delet cmph_uint8 found = 0; DEBUGP("Checking degree of vertex %u connected to edge %u\n", v, edge); if (edge == EMPTY) return 0; - else if (!(GETBIT(deleted, abs_edge(edge, 0)))) + else if (!(GETBIT(deleted, abs_edge(edge, 0)))) { found = 1; *e = edge; @@ -206,17 +206,17 @@ static void cyclic_del_edge(graph_t *g, cmph_uint32 v, cmph_uint8 *deleted) degree1 = find_degree1_edge(g, v1, deleted, &e); if (!degree1) return; - while(1) + while(1) { DEBUGP("Deleting edge %u (%u->%u)\n", e, g->edges[abs_edge(e, 0)], g->edges[abs_edge(e, 1)]); SETBIT(deleted, abs_edge(e, 0)); - + v2 = g->edges[abs_edge(e, 0)]; if (v2 == v1) v2 = g->edges[abs_edge(e, 1)]; - DEBUGP("Checking if second endpoint %u has degree 1\n", v2); + DEBUGP("Checking if second endpoint %u has degree 1\n", v2); degree1 = find_degree1_edge(g, v2, deleted, &e); - if (degree1) + if (degree1) { DEBUGP("Inspecting vertex %u\n", v2); v1 = v2; @@ -240,7 +240,7 @@ int graph_is_cyclic(graph_t *g) } for (i = 0; i < g->nedges; ++i) { - if (!(GETBIT(deleted, i))) + if (!(GETBIT(deleted, i))) { DEBUGP("Edge %u %u->%u was not deleted\n", i, g->edges[i], g->edges[i + g->nedges]); free(deleted); @@ -275,15 +275,15 @@ void graph_obtain_critical_nodes(graph_t *g) /* included -- Fabiano*/ for (i = 0; i < g->nedges; ++i) { - if (!(GETBIT(deleted,i))) + if (!(GETBIT(deleted,i))) { DEBUGP("Edge %u %u->%u belongs to the 2-core\n", i, g->edges[i], g->edges[i + g->nedges]); - if(!(GETBIT(g->critical_nodes,g->edges[i]))) + if(!(GETBIT(g->critical_nodes,g->edges[i]))) { g->ncritical_nodes ++; SETBIT(g->critical_nodes,g->edges[i]); } - if(!(GETBIT(g->critical_nodes,g->edges[i + g->nedges]))) + if(!(GETBIT(g->critical_nodes,g->edges[i + g->nedges]))) { g->ncritical_nodes ++; SETBIT(g->critical_nodes,g->edges[i + g->nedges]); @@ -328,11 +328,9 @@ graph_iterator_t graph_neighbors_it(graph_t *g, cmph_uint32 v) cmph_uint32 graph_next_neighbor(graph_t *g, graph_iterator_t* it) { cmph_uint32 ret; - if(it->edge == EMPTY) return GRAPH_NO_NEIGHBOR; + if(it->edge == EMPTY) return GRAPH_NO_NEIGHBOR; if (g->edges[it->edge] == it->vertex) ret = g->edges[it->edge + g->nedges]; else ret = g->edges[it->edge]; it->edge = g->next[it->edge]; return ret; } - - diff --git a/src/hash.c b/src/hash.c index 7ab0b04..aa8c95f 100644 --- a/src/hash.c +++ b/src/hash.c @@ -133,7 +133,7 @@ void hash_state_destroy(hash_state_t *state) * \brief Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed. * \param state points to the hash function * \param hash_packed pointer to the contiguous memory area used to store the hash function. The size of hash_packed must be at least hash_state_packed_size() - * + * * Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed. * However, the hash function type must be packed outside. */ @@ -142,20 +142,20 @@ void hash_state_pack(hash_state_t *state, void *hash_packed) switch (state->hashfunc) { case CMPH_HASH_JENKINS: - // pack the jenkins hash function + // pack the jenkins hash function jenkins_state_pack((jenkins_state_t *)state, hash_packed); break; default: assert(0); } - return; + return; } /** \fn cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc) * \brief Return the amount of space needed to pack a hash function. * \param hashfunc function type * \return the size of the packed function or zero for failures - */ + */ cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc) { cmph_uint32 size = 0; @@ -197,7 +197,7 @@ cmph_uint32 hash_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cm * \param hashes is a pointer to a memory large enough to fit three 32-bit integers. */ void hash_vector_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes) -{ +{ switch (hashfunc) { case CMPH_HASH_JENKINS: diff --git a/src/hashtree.c b/src/hashtree.c index 2f3567e..1bfd852 100644 --- a/src/hashtree.c +++ b/src/hashtree.c @@ -41,7 +41,7 @@ void hashtree_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs) while(*hashptr != CMPH_HASH_COUNT) { if (i >= 3) break; //hashtree only uses three hash functions - hashtree->hashfuncs[i] = *hashptr; + hashtree->hashfuncs[i] = *hashptr; ++i, ++hashptr; } } @@ -55,8 +55,8 @@ cmph_t *hashtree_new(cmph_config_t *mph, double c) cmph_uint32 iterations = 20; cmph_uint8 *visited = NULL; hashtree_config_data_t *hashtree = (hashtree_config_data_t *)mph->data; - hashtree->m = mph->key_source->nkeys; - hashtree->n = ceil(c * mph->key_source->nkeys); + hashtree->m = mph->key_source->nkeys; + hashtree->n = ceil(c * mph->key_source->nkeys); DEBUGP("m (edges): %u n (vertices): %u c: %f\n", hashtree->m, hashtree->n, c); hashtree->graph = graph_new(hashtree->n, hashtree->m); DEBUGP("Created graph\n"); @@ -87,12 +87,12 @@ cmph_t *hashtree_new(cmph_config_t *mph, double c) fprintf(stderr, "Acyclic graph creation failure - %u iterations remaining\n", iterations); } if (iterations == 0) break; - } - else break; + } + else break; } if (iterations == 0) { - graph_destroy(hashtree->graph); + graph_destroy(hashtree->graph); return NULL; } @@ -115,7 +115,7 @@ cmph_t *hashtree_new(cmph_config_t *mph, double c) hashtree_traverse(hashtree, visited, i); } } - graph_destroy(hashtree->graph); + graph_destroy(hashtree->graph); free(visited); hashtree->graph = NULL; @@ -144,7 +144,7 @@ static void hashtree_traverse(hashtree_config_data_t *hashtree, cmph_uint8 *visi graph_iterator_t it = graph_neighbors_it(hashtree->graph, v); cmph_uint32 neighbor = 0; SETBIT(visited,v); - + DEBUGP("Visiting vertex %u\n", v); while((neighbor = graph_next_neighbor(hashtree->graph, &it)) != GRAPH_NO_NEIGHBOR) { @@ -157,7 +157,7 @@ static void hashtree_traverse(hashtree_config_data_t *hashtree, cmph_uint8 *visi hashtree_traverse(hashtree, visited, neighbor); } } - + static int hashtree_gen_edges(cmph_config_t *mph) { cmph_uint32 e; @@ -165,7 +165,7 @@ static int hashtree_gen_edges(cmph_config_t *mph) int cycles = 0; DEBUGP("Generating edges for %u vertices with hash functions %s and %s\n", hashtree->n, cmph_hash_names[hashtree->hashfuncs[0]], cmph_hash_names[hashtree->hashfuncs[1]]); - graph_clear_edges(hashtree->graph); + graph_clear_edges(hashtree->graph); mph->key_source->rewind(mph->key_source->data); for (e = 0; e < mph->key_source->nkeys; ++e) { @@ -176,7 +176,7 @@ static int hashtree_gen_edges(cmph_config_t *mph) h1 = hash(hashtree->hashes[0], key, keylen) % hashtree->n; h2 = hash(hashtree->hashes[1], key, keylen) % hashtree->n; if (h1 == h2) if (++h2 >= hashtree->n) h2 = 0; - if (h1 == h2) + if (h1 == h2) { if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e); mph->key_source->dispose(mph->key_source->data, key, keylen); @@ -216,7 +216,7 @@ int hashtree_dump(cmph_t *mphf, FILE *fd) fwrite(&(data->n), sizeof(cmph_uint32), 1, fd); fwrite(&(data->m), sizeof(cmph_uint32), 1, fd); - + fwrite(data->g, sizeof(cmph_uint32)*data->n, 1, fd); #ifdef DEBUG fprintf(stderr, "G: "); @@ -253,8 +253,8 @@ void hashtree_load(FILE *f, cmph_t *mphf) } DEBUGP("Reading m and n\n"); - fread(&(hashtree->n), sizeof(cmph_uint32), 1, f); - fread(&(hashtree->m), sizeof(cmph_uint32), 1, f); + fread(&(hashtree->n), sizeof(cmph_uint32), 1, f); + fread(&(hashtree->m), sizeof(cmph_uint32), 1, f); hashtree->g = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*hashtree->n); fread(hashtree->g, hashtree->n*sizeof(cmph_uint32), 1, f); @@ -265,7 +265,7 @@ void hashtree_load(FILE *f, cmph_t *mphf) #endif return; } - + cmph_uint32 hashtree_search(cmph_t *mphf, const char *key, cmph_uint32 keylen) { @@ -280,7 +280,7 @@ cmph_uint32 hashtree_search(cmph_t *mphf, const char *key, cmph_uint32 keylen) void hashtree_destroy(cmph_t *mphf) { hashtree_data_t *data = (hashtree_data_t *)mphf->data; - free(data->g); + free(data->g); hash_state_destroy(data->hashes[0]); hash_state_destroy(data->hashes[1]); free(data->hashes); diff --git a/src/jenkins_hash.c b/src/jenkins_hash.c index 65cdff9..d540216 100644 --- a/src/jenkins_hash.c +++ b/src/jenkins_hash.c @@ -28,16 +28,16 @@ have at least 1/4 probability of changing. * If mix() is run forward, every bit of c will change between 1/3 and 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.) - mix() was built out of 36 single-cycle latency instructions in a + mix() was built out of 36 single-cycle latency instructions in a structure that could supported 2x parallelism, like so: - a -= b; + a -= b; a -= c; x = (c>>13); b -= c; a ^= x; b -= a; x = (a<<8); c -= a; b ^= x; c -= b; x = (b>>13); ... - Unfortunately, superscalar Pentiums and Sparcs can't take advantage + Unfortunately, superscalar Pentiums and Sparcs can't take advantage of that parallelism. They've also turned some of those single-cycle latency instructions into multi-cycle latency instructions. Still, this is the fastest good hash I could find. There were about 2^^68 @@ -87,6 +87,7 @@ acceptable. Do NOT use for cryptographic purposes. jenkins_state_t *jenkins_state_new(cmph_uint32 size) //size of hash table { jenkins_state_t *state = (jenkins_state_t *)malloc(sizeof(jenkins_state_t)); + if (!state) return NULL; DEBUGP("Initializing jenkins hash\n"); state->seed = ((cmph_uint32)rand() % size); return state; @@ -121,28 +122,28 @@ static inline void __jenkins_hash_vector(cmph_uint32 seed, const char *k, cmph_u hashes[2] += length; switch(len) /* all the case statements fall through */ { - case 11: + case 11: hashes[2] +=((cmph_uint32)k[10]<<24); - case 10: + case 10: hashes[2] +=((cmph_uint32)k[9]<<16); - case 9 : + case 9 : hashes[2] +=((cmph_uint32)k[8]<<8); /* the first byte of hashes[2] is reserved for the length */ - case 8 : + case 8 : hashes[1] +=((cmph_uint32)k[7]<<24); - case 7 : + case 7 : hashes[1] +=((cmph_uint32)k[6]<<16); - case 6 : + case 6 : hashes[1] +=((cmph_uint32)k[5]<<8); case 5 : hashes[1] +=(cmph_uint8) k[4]; - case 4 : + case 4 : hashes[0] +=((cmph_uint32)k[3]<<24); - case 3 : + case 3 : hashes[0] +=((cmph_uint32)k[2]<<16); - case 2 : + case 2 : hashes[0] +=((cmph_uint32)k[1]<<8); - case 1 : + case 1 : hashes[0] +=(cmph_uint8)k[0]; /* case 0: nothing left to add */ } @@ -158,13 +159,13 @@ cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keyl /* cmph_uint32 a, b, c; cmph_uint32 len, length; - // Set up the internal state + // Set up the internal state length = keylen; len = length; - a = b = 0x9e3779b9; // the golden ratio; an arbitrary value - c = state->seed; // the previous hash value - seed in our case + a = b = 0x9e3779b9; // the golden ratio; an arbitrary value + c = state->seed; // the previous hash value - seed in our case - // handle most of the key + // handle most of the key while (len >= 12) { a += (k[0] +((cmph_uint32)k[1]<<8) +((cmph_uint32)k[2]<<16) +((cmph_uint32)k[3]<<24)); @@ -176,37 +177,37 @@ cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keyl // handle the last 11 bytes c += length; - switch(len) /// all the case statements fall through + switch(len) /// all the case statements fall through { - case 11: + case 11: c +=((cmph_uint32)k[10]<<24); - case 10: + case 10: c +=((cmph_uint32)k[9]<<16); - case 9 : + case 9 : c +=((cmph_uint32)k[8]<<8); - // the first byte of c is reserved for the length - case 8 : + // the first byte of c is reserved for the length + case 8 : b +=((cmph_uint32)k[7]<<24); - case 7 : + case 7 : b +=((cmph_uint32)k[6]<<16); - case 6 : + case 6 : b +=((cmph_uint32)k[5]<<8); - case 5 : + case 5 : b +=k[4]; - case 4 : + case 4 : a +=((cmph_uint32)k[3]<<24); - case 3 : + case 3 : a +=((cmph_uint32)k[2]<<16); - case 2 : + case 2 : a +=((cmph_uint32)k[1]<<8); - case 1 : + case 1 : a +=k[0]; - // case 0: nothing left to add + // case 0: nothing left to add } mix(a,b,c); - /// report the result + /// report the result return c; */ @@ -221,7 +222,7 @@ void jenkins_state_dump(jenkins_state_t *state, char **buf, cmph_uint32 *buflen) { *buflen = sizeof(cmph_uint32); *buf = (char *)malloc(sizeof(cmph_uint32)); - if (!*buf) + if (!*buf) { *buflen = UINT_MAX; return; @@ -252,7 +253,7 @@ jenkins_state_t *jenkins_state_load(const char *buf, cmph_uint32 buflen) /** \fn void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed); * \brief Support the ability to pack a jenkins function into a preallocated contiguous memory space pointed by jenkins_packed. * \param state points to the jenkins function - * \param jenkins_packed pointer to the contiguous memory area used to store the jenkins function. The size of jenkins_packed must be at least jenkins_state_packed_size() + * \param jenkins_packed pointer to the contiguous memory area used to store the jenkins function. The size of jenkins_packed must be at least jenkins_state_packed_size() */ void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed) { @@ -265,7 +266,7 @@ void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed) /** \fn cmph_uint32 jenkins_state_packed_size(jenkins_state_t *state); * \brief Return the amount of space needed to pack a jenkins function. * \return the size of the packed function or zero for failures - */ + */ cmph_uint32 jenkins_state_packed_size(void) { return sizeof(cmph_uint32); diff --git a/src/linear_string_map.c b/src/linear_string_map.c index 4390c5b..85f8d21 100644 --- a/src/linear_string_map.c +++ b/src/linear_string_map.c @@ -12,6 +12,7 @@ struct __linear_string_map_t { lsmap_t *lsmap_new() { lsmap_t* lsmap = (lsmap_t*)malloc(sizeof(lsmap_t)); + if (!lsmap) return NULL; lsmap->key = "dummy node"; lsmap->next = NULL; return lsmap; @@ -42,7 +43,7 @@ void* lsmap_search(lsmap_t *lsmap, const char *key) { } return NULL; } - + void lsmap_foreach_key(lsmap_t *lsmap, void (*f)(const char*)) { while (lsmap->next != NULL) { f(lsmap->key); @@ -65,4 +66,3 @@ void lsmap_destroy(lsmap_t *lsmap) { } free(lsmap); } - diff --git a/src/main.c b/src/main.c index f739b32..95a75c5 100644 --- a/src/main.c +++ b/src/main.c @@ -22,13 +22,13 @@ void usage(const char *prg) { - fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c algorithm_dependent_value][-s seed] ] [-a algorithm] [-M memory_in_MB] [-b algorithm_dependent_value] [-t keys_per_bin] [-d tmp_dir] [-m file.mph] keysfile\n", prg); + fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c algorithm_dependent_value][-s seed] ] [-a algorithm] [-M memory_in_MB] [-b algorithm_dependent_value] [-t keys_per_bin] [-d tmp_dir] [-m file.mph] keysfile\n", prg); } void usage_long(const char *prg) { cmph_uint32 i; - fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c algorithm_dependent_value][-s seed] ] [-a algorithm] [-M memory_in_MB] [-b algorithm_dependent_value] [-t keys_per_bin] [-d tmp_dir] [-m file.mph] keysfile\n", prg); - fprintf(stderr, "Minimum perfect hashing tool\n\n"); + fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c algorithm_dependent_value][-s seed] ] [-a algorithm] [-M memory_in_MB] [-b algorithm_dependent_value] [-t keys_per_bin] [-d tmp_dir] [-m file.mph] keysfile\n", prg); + fprintf(stderr, "Minimum perfect hashing tool\n\n"); fprintf(stderr, " -h\t print this help message\n"); fprintf(stderr, " -c\t c value determines:\n"); fprintf(stderr, " \t * the number of vertices in the graph for the algorithms BMZ and CHM\n"); @@ -57,7 +57,7 @@ void usage_long(const char *prg) fprintf(stderr, " \t and its value should be an integer in the range [1,32]. Default is 4. The\n"); fprintf(stderr, " \t larger is this value, the slower is the construction of the functions.\n"); fprintf(stderr, " \t This parameter has no effect for other algorithms.\n\n"); - fprintf(stderr, " -t\t set the number of keys per bin for a t-perfect hashing function. A t-perfect\n"); + fprintf(stderr, " -t\t set the number of keys per bin for a t-perfect hashing function. A t-perfect\n"); fprintf(stderr, " \t hash function allows at most t collisions in a given bin. This parameter applies\n"); fprintf(stderr, " \t only to the CHD and CHD_PH algorithms. Its value should be an integer in the\n"); fprintf(stderr, " \t range [1,128]. Defaul is 1\n"); @@ -182,7 +182,7 @@ int main(int argc, char **argv) break; } } - if (!valid) + if (!valid) { fprintf(stderr, "Invalid mph algorithm: %s. It is not available in version %s\n", optarg, VERSION); return -1; @@ -204,7 +204,7 @@ int main(int argc, char **argv) break; } } - if (!valid) + if (!valid) { fprintf(stderr, "Invalid hash function: %s\n", optarg); return -1; @@ -223,7 +223,7 @@ int main(int argc, char **argv) return 1; } keys_file = argv[optind]; - + if (seed == UINT_MAX) seed = (cmph_uint32)time(NULL); srand(seed); int ret = 0; @@ -232,7 +232,7 @@ int main(int argc, char **argv) mphf_file = (char *)malloc(strlen(keys_file) + 5); memcpy(mphf_file, keys_file, strlen(keys_file)); memcpy(mphf_file + strlen(keys_file), ".mph\0", (size_t)5); - } + } keys_fd = fopen(keys_file, "r"); @@ -258,7 +258,7 @@ int main(int argc, char **argv) cmph_config_set_memory_availability(config, memory_availability); cmph_config_set_b(config, b); cmph_config_set_keys_per_bin(config, keys_per_bin); - + //if((mph_algo == CMPH_BMZ || mph_algo == CMPH_BRZ) && c >= 2.0) c=1.15; if(mph_algo == CMPH_BMZ && c >= 2.0) c=1.15; if (c != 0) cmph_config_set_graphsize(config, c); @@ -279,8 +279,8 @@ int main(int argc, char **argv) free(mphf_file); return -1; } - cmph_dump(mphf, mphf_fd); - cmph_destroy(mphf); + cmph_dump(mphf, mphf_fd); + cmph_destroy(mphf); fclose(mphf_fd); } else @@ -329,7 +329,7 @@ int main(int argc, char **argv) } source->dispose(source->data, buf, buflen); } - + cmph_destroy(mphf); free(hashtable); } @@ -338,5 +338,5 @@ int main(int argc, char **argv) free(tmp_dir); cmph_io_nlfile_adapter_destroy(source); return ret; - + } diff --git a/src/sdbm_hash.c b/src/sdbm_hash.c index 2f706c9..3a052fd 100644 --- a/src/sdbm_hash.c +++ b/src/sdbm_hash.c @@ -4,6 +4,7 @@ sdbm_state_t *sdbm_state_new() { sdbm_state_t *state = (sdbm_state_t *)malloc(sizeof(sdbm_state_t)); + if (!state) return NULL; state->hashfunc = CMPH_HASH_SDBM; return state; } diff --git a/src/vqueue.c b/src/vqueue.c index 0619dd7..5c90ee0 100644 --- a/src/vqueue.c +++ b/src/vqueue.c @@ -12,7 +12,7 @@ vqueue_t * vqueue_new(cmph_uint32 capacity) { size_t capacity_plus_one = capacity + 1; vqueue_t *q = (vqueue_t *)malloc(sizeof(vqueue_t)); - assert(q); + if (!q) return NULL; q->values = (cmph_uint32 *)calloc(capacity_plus_one, sizeof(cmph_uint32)); q->beg = q->end = 0; q->capacity = (cmph_uint32) capacity_plus_one; @@ -43,7 +43,7 @@ void vqueue_print(vqueue_t * q) cmph_uint32 i; for (i = q->beg; i != q->end; i = (i + 1)%q->capacity) fprintf(stderr, "%u\n", q->values[(i + 1)%q->capacity]); -} +} void vqueue_destroy(vqueue_t *q) { diff --git a/src/vstack.c b/src/vstack.c index 96f5380..8791550 100644 --- a/src/vstack.c +++ b/src/vstack.c @@ -76,4 +76,3 @@ void vstack_reserve(vstack_t *stack, cmph_uint32 size) DEBUGP("Increased\n"); } } -