From 39e68583d3952b9392af1fd19b5d1d3d6a0ef7f9 Mon Sep 17 00:00:00 2001 From: fc_botelho Date: Sun, 23 Mar 2008 00:46:34 +0000 Subject: [PATCH] *** empty log message *** --- src/Makefile.am | 4 +- src/bdz.c | 599 +++++++++++++++++++++++++++++++++++++ src/bdz.h | 19 ++ src/bdz_gen_lookup_table.c | 33 ++ src/bdz_structs.h | 36 +++ src/bitbool.c | 1 + src/bitbool.h | 15 +- src/bmz.c | 8 +- src/bmz8.c | 8 +- src/cmph | 131 -------- src/cmph.c | 43 ++- src/cmph_types.h | 5 +- src/hash.c | 66 +--- src/hash.h | 19 ++ src/hash_state.h | 6 - src/jenkins_hash.c | 53 ++++ src/jenkins_hash.h | 16 + src/libcmph.la | 35 --- 18 files changed, 851 insertions(+), 246 deletions(-) create mode 100755 src/bdz.c create mode 100755 src/bdz.h create mode 100755 src/bdz_gen_lookup_table.c create mode 100755 src/bdz_structs.h delete mode 100755 src/cmph delete mode 100644 src/libcmph.la diff --git a/src/Makefile.am b/src/Makefile.am index 3140214..ab7befe 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -6,9 +6,6 @@ libcmph_la_SOURCES = debug.h\ cmph_types.h\ hash.h hash_state.h hash.c\ jenkins_hash.h jenkins_hash.c\ - djb2_hash.h djb2_hash.c\ - sdbm_hash.h sdbm_hash.c\ - fnv_hash.h fnv_hash.c\ vstack.h vstack.c\ vqueue.h vqueue.c\ graph.h graph.c\ @@ -17,6 +14,7 @@ libcmph_la_SOURCES = debug.h\ chm.h chm_structs.h chm.c\ bmz.h bmz_structs.h bmz.c\ bmz8.h bmz8_structs.h bmz8.c\ + bdz.h bdz_structs.h bdz.c\ buffer_manager.h buffer_manager.c\ buffer_entry.h buffer_entry.c\ brz.h brz_structs.h brz.c\ diff --git a/src/bdz.c b/src/bdz.c new file mode 100755 index 0000000..b01b8f4 --- /dev/null +++ b/src/bdz.c @@ -0,0 +1,599 @@ +#include "bdz.h" +#include "cmph_structs.h" +#include "bdz_structs.h" +#include "hash.h" +#include "bitbool.h" + +#include +#include +#include +#include +#include +//#define DEBUG +#include "debug.h" +#define UNASSIGNED 3 +#define NULL_EDGE 0xffffffff + +//cmph_uint32 ngrafos = 0; +//cmph_uint32 ngrafos_aciclicos = 0; +// table used for looking up the number of assigned vertices a 8-bit integer +const cmph_uint8 bdz_lookup_table[] = +{ +4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2, +4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2, +4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2, +3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1, +4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2, +4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2, +4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2, +3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1, +4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2, +4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2, +4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2, +3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1, +3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1, +3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1, +3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1, +2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1, 0 +}; + +typedef struct +{ + cmph_uint32 vertices[3]; + cmph_uint32 next_edges[3]; +}bdz_edge_t; + +typedef cmph_uint32 * bdz_queue_t; + +static void bdz_alloc_queue(bdz_queue_t * queuep, cmph_uint32 nedges) +{ + (*queuep)=malloc(nedges*sizeof(cmph_uint32)); +}; +static void bdz_free_queue(bdz_queue_t * queue) +{ + free(*queue); +}; + +typedef struct +{ + cmph_uint32 nedges; + bdz_edge_t * edges; + cmph_uint32 * first_edge; + cmph_uint8 * vert_degree; +}bdz_graph3_t; + + +static void bdz_alloc_graph3(bdz_graph3_t * graph3, cmph_uint32 nedges, cmph_uint32 nvertices) +{ + graph3->edges=malloc(nedges*sizeof(bdz_edge_t)); + graph3->first_edge=malloc(nvertices*sizeof(cmph_uint32)); + graph3->vert_degree=malloc(nvertices); +}; +static void bdz_init_graph3(bdz_graph3_t * graph3, cmph_uint32 nedges, cmph_uint32 nvertices) +{ + memset(graph3->first_edge,0xff,nvertices*sizeof(cmph_uint32)); + memset(graph3->vert_degree,0,nvertices); + graph3->nedges=0; +}; +static void bdz_free_graph3(bdz_graph3_t *graph3) +{ + free(graph3->edges); + free(graph3->first_edge); + free(graph3->vert_degree); +}; + +static void bdz_partial_free_graph3(bdz_graph3_t *graph3) +{ + free(graph3->first_edge); + free(graph3->vert_degree); + graph3->first_edge = NULL; + graph3->vert_degree = NULL; +}; + +static void bdz_add_edge(bdz_graph3_t * graph3, cmph_uint32 v0, cmph_uint32 v1, cmph_uint32 v2) +{ + graph3->edges[graph3->nedges].vertices[0]=v0; + graph3->edges[graph3->nedges].vertices[1]=v1; + graph3->edges[graph3->nedges].vertices[2]=v2; + graph3->edges[graph3->nedges].next_edges[0]=graph3->first_edge[v0]; + graph3->edges[graph3->nedges].next_edges[1]=graph3->first_edge[v1]; + graph3->edges[graph3->nedges].next_edges[2]=graph3->first_edge[v2]; + graph3->first_edge[v0]=graph3->first_edge[v1]=graph3->first_edge[v2]=graph3->nedges; + graph3->vert_degree[v0]++; + graph3->vert_degree[v1]++; + graph3->vert_degree[v2]++; + graph3->nedges++; +}; + +static void bdz_dump_graph(bdz_graph3_t* graph3, cmph_uint32 nedges, cmph_uint32 nvertices) +{ + int i; + for(i=0;iedges[i].vertices[0], + graph3->edges[i].vertices[1],graph3->edges[i].vertices[2]); + printf(" nexts %d %d %d",graph3->edges[i].next_edges[0], + graph3->edges[i].next_edges[1],graph3->edges[i].next_edges[2]); + }; + + for(i=0;ifirst_edge[i]); + + }; +}; + +static void bdz_remove_edge(bdz_graph3_t * graph3, cmph_uint32 curr_edge) +{ + cmph_uint32 i,j=0,vert,edge1,edge2; + for(i=0;i<3;i++){ + vert=graph3->edges[curr_edge].vertices[i]; + edge1=graph3->first_edge[vert]; + edge2=NULL_EDGE; + while(edge1!=curr_edge&&edge1!=NULL_EDGE){ + edge2=edge1; + if(graph3->edges[edge1].vertices[0]==vert){ + j=0; + } else if(graph3->edges[edge1].vertices[1]==vert){ + j=1; + } else + j=2; + edge1=graph3->edges[edge1].next_edges[j]; + }; + if(edge1==NULL_EDGE){ + printf("\nerror remove edge %d dump graph",curr_edge); + bdz_dump_graph(graph3,graph3->nedges,graph3->nedges+graph3->nedges/4); + exit(-1); + }; + + if(edge2!=NULL_EDGE){ + graph3->edges[edge2].next_edges[j] = + graph3->edges[edge1].next_edges[i]; + } else + graph3->first_edge[vert]= + graph3->edges[edge1].next_edges[i]; + graph3->vert_degree[vert]--; + }; + +}; + +static int bdz_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_queue_t queue, bdz_graph3_t* graph3) +{ + cmph_uint32 i,v0,v1,v2; + cmph_uint32 queue_head=0,queue_tail=0; + cmph_uint32 curr_edge; + cmph_uint32 tmp_edge; + cmph_uint8 * marked_edge =malloc((nedges >> 3) + 1); + memset(marked_edge, 0, (nedges >> 3) + 1); + + for(i=0;iedges[i].vertices[0]; + v1=graph3->edges[i].vertices[1]; + v2=graph3->edges[i].vertices[2]; + if(graph3->vert_degree[v0]==1 || + graph3->vert_degree[v1]==1 || + graph3->vert_degree[v2]==1){ + if(!GETBIT(marked_edge,i)) { + queue[queue_head++]=i; + SETBIT(marked_edge,i); + } + }; + }; + while(queue_tail!=queue_head){ + curr_edge=queue[queue_tail++]; + bdz_remove_edge(graph3,curr_edge); + v0=graph3->edges[curr_edge].vertices[0]; + v1=graph3->edges[curr_edge].vertices[1]; + v2=graph3->edges[curr_edge].vertices[2]; + if(graph3->vert_degree[v0]==1 ) { + tmp_edge=graph3->first_edge[v0]; + if(!GETBIT(marked_edge,tmp_edge)) { + queue[queue_head++]=tmp_edge; + SETBIT(marked_edge,tmp_edge); + }; + + }; + if(graph3->vert_degree[v1]==1) { + tmp_edge=graph3->first_edge[v1]; + if(!GETBIT(marked_edge,tmp_edge)){ + queue[queue_head++]=tmp_edge; + SETBIT(marked_edge,tmp_edge); + }; + + }; + if(graph3->vert_degree[v2]==1){ + tmp_edge=graph3->first_edge[v2]; + if(!GETBIT(marked_edge,tmp_edge)){ + queue[queue_head++]=tmp_edge; + SETBIT(marked_edge,tmp_edge); + }; + }; + }; + free(marked_edge); + return queue_head-nedges;/* returns 0 if successful otherwies return negative number*/ +}; + +static int bdz_mapping(cmph_config_t *mph, bdz_graph3_t* graph3, bdz_queue_t queue); +static void assigning(bdz_config_data_t *bdz, bdz_graph3_t* graph3, bdz_queue_t queue); +static void ranking(bdz_config_data_t *bdz); +static cmph_uint32 rank(bdz_data_t *bdz, cmph_uint32 vertex); + +bdz_config_data_t *bdz_config_new() +{ + bdz_config_data_t *bdz; + bdz = (bdz_config_data_t *)malloc(sizeof(bdz_config_data_t)); + assert(bdz); + memset(bdz, 0, sizeof(bdz_config_data_t)); + bdz->hashfunc = CMPH_HASH_JENKINS; + bdz->g = NULL; + bdz->hl = NULL; + bdz->k = 0; //kth index in ranktable, $k = log_2(n=3r)/\varepsilon$ + bdz->b = 7; // number of bits of k + bdz->ranktablesize = 0; //number of entries in ranktable, $n/k +1$ + bdz->ranktable = NULL; // rank table + return bdz; +} + +void bdz_config_destroy(cmph_config_t *mph) +{ + bdz_config_data_t *data = (bdz_config_data_t *)mph->data; + DEBUGP("Destroying algorithm dependent data\n"); + free(data); +} + +void bdz_config_set_b(cmph_config_t *mph, cmph_uint8 b) +{ + bdz_config_data_t *bdz = (bdz_config_data_t *)mph->data; + if (b <= 2) b = 7; // validating restrictions over parameter b. + bdz->b = b; + DEBUGP("b: %u\n", b); + +} + +void bdz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs) +{ + bdz_config_data_t *bdz = (bdz_config_data_t *)mph->data; + CMPH_HASH *hashptr = hashfuncs; + cmph_uint32 i = 0; + while(*hashptr != CMPH_HASH_COUNT) + { + if (i >= 1) break; //bdz only uses one linear hash function + bdz->hashfunc = *hashptr; + ++i, ++hashptr; + } +} + +cmph_t *bdz_new(cmph_config_t *mph, float c) +{ + cmph_t *mphf = NULL; + bdz_data_t *bdzf = NULL; + cmph_uint32 iterations; + bdz_queue_t edges; + bdz_graph3_t graph3; + bdz_config_data_t *bdz = (bdz_config_data_t *)mph->data; + if (c == 0) c = 1.25; // validating restrictions over parameter c. + DEBUGP("c: %f\n", c); + bdz->m = mph->key_source->nkeys; + bdz->r = ceil((c * mph->key_source->nkeys)/3); + bdz->n = 3*bdz->r; + + bdz->k = (1 << bdz->b); + DEBUGP("b: %u -- k: %u\n", bdz->b, bdz->k); + + bdz->ranktablesize = bdz->n/bdz->k + 2; + DEBUGP("ranktablesize: %u\n", bdz->ranktablesize); + + + bdz_alloc_graph3(&graph3, bdz->m, bdz->n); + bdz_alloc_queue(&edges,bdz->m); + DEBUGP("Created hypergraph\n"); + + DEBUGP("m (edges): %u n (vertices): %u r: %u c: %f \n", bdz->m, bdz->n, bdz->r, c); + + // Mapping step + iterations = 1000; + if (mph->verbosity) + { + fprintf(stderr, "Entering mapping step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n); + } + while(1) + { + int ok; + DEBUGP("linear hash function \n"); + bdz->hl = hash_state_new(bdz->hashfunc, 15); + + ok = bdz_mapping(mph, &graph3, edges); + //ok = 0; + if (!ok) + { + --iterations; + hash_state_destroy(bdz->hl); + bdz->hl = NULL; + DEBUGP("%u iterations remaining\n", iterations); + if (mph->verbosity) + { + fprintf(stderr, "acyclic graph creation failure - %u iterations remaining\n", iterations); + } + if (iterations == 0) break; + } + else break; + } + + if (iterations == 0) + { + bdz_free_queue(&edges); + bdz_free_graph3(&graph3); + return NULL; + } + bdz_partial_free_graph3(&graph3); + // Assigning step + if (mph->verbosity) + { + fprintf(stderr, "Entering assigning step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n); + } + assigning(bdz, &graph3, edges); + + bdz_free_queue(&edges); + bdz_free_graph3(&graph3); + if (mph->verbosity) + { + fprintf(stderr, "Entering ranking step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n); + } + ranking(bdz); + + mphf = (cmph_t *)malloc(sizeof(cmph_t)); + mphf->algo = mph->algo; + bdzf = (bdz_data_t *)malloc(sizeof(bdz_data_t)); + bdzf->g = bdz->g; + bdz->g = NULL; //transfer memory ownership + bdzf->hl = bdz->hl; + bdz->hl = NULL; //transfer memory ownership + bdzf->ranktable = bdz->ranktable; + bdz->ranktable = NULL; //transfer memory ownership + bdzf->ranktablesize = bdz->ranktablesize; + bdzf->k = bdz->k; + bdzf->b = bdz->b; + bdzf->n = bdz->n; + bdzf->m = bdz->m; + bdzf->r = bdz->r; + mphf->data = bdzf; + mphf->size = bdz->m; + + DEBUGP("Successfully generated minimal perfect hash\n"); + if (mph->verbosity) + { + fprintf(stderr, "Successfully generated minimal perfect hash function\n"); + } + + return mphf; +} + + +static int bdz_mapping(cmph_config_t *mph, bdz_graph3_t* graph3, bdz_queue_t queue) +{ + cmph_uint32 e; + int cycles = 0; + cmph_uint32 hl[3]; + bdz_config_data_t *bdz = (bdz_config_data_t *)mph->data; + bdz_init_graph3(graph3, bdz->m, bdz->n); + mph->key_source->rewind(mph->key_source->data); + for (e = 0; e < mph->key_source->nkeys; ++e) + { + cmph_uint32 h0, h1, h2; + cmph_uint32 keylen; + char *key = NULL; + mph->key_source->read(mph->key_source->data, &key, &keylen); + hash_vector(bdz->hl, key, keylen,hl); + h0 = hl[0] % bdz->r; + h1 = hl[1] % bdz->r + bdz->r; + h2 = hl[2] % bdz->r + (bdz->r << 1); + mph->key_source->dispose(mph->key_source->data, key, keylen); + bdz_add_edge(graph3,h0,h1,h2); + } + cycles = bdz_generate_queue(bdz->m, bdz->n, queue, graph3); + return (cycles == 0); +} + +static void assigning(bdz_config_data_t *bdz, bdz_graph3_t* graph3, bdz_queue_t queue) +{ + cmph_uint32 i; + cmph_uint32 nedges=graph3->nedges; + cmph_uint32 curr_edge; + cmph_uint32 v0,v1,v2; + cmph_uint8 * marked_vertices =malloc((bdz->n >> 3) + 1); + bdz->g = (cmph_uint8 *)calloc((bdz->n >> 2)+1, sizeof(cmph_uint8)); + memset(marked_vertices, 0, (bdz->n >> 3) + 1); + memset(bdz->g, 0xff, (bdz->n >> 2) + 1); + + for(i=nedges-1;i+1>=1;i--){ + curr_edge=queue[i]; + v0=graph3->edges[curr_edge].vertices[0]; + v1=graph3->edges[curr_edge].vertices[1]; + v2=graph3->edges[curr_edge].vertices[2]; + DEBUGP("B:%u %u %u -- %u %u %u\n", v0, v1, v2, GETVALUE(bdz->g, v0), GETVALUE(bdz->g, v1), GETVALUE(bdz->g, v2)); + if(!GETBIT(marked_vertices, v0)){ + if(!GETBIT(marked_vertices,v1)) + { + SETVALUE(bdz->g, v1, UNASSIGNED); + SETBIT(marked_vertices, v1); + } + if(!GETBIT(marked_vertices,v2)) + { + SETVALUE(bdz->g, v2, UNASSIGNED); + SETBIT(marked_vertices, v2); + } + SETVALUE(bdz->g, v0, (6-(GETVALUE(bdz->g, v1) + GETVALUE(bdz->g,v2)))%3); + SETBIT(marked_vertices, v0); + } else if(!GETBIT(marked_vertices, v1)) { + if(!GETBIT(marked_vertices, v2)) + { + SETVALUE(bdz->g, v2, UNASSIGNED); + SETBIT(marked_vertices, v2); + } + SETVALUE(bdz->g, v1, (7-(GETVALUE(bdz->g, v0)+GETVALUE(bdz->g, v2)))%3); + SETBIT(marked_vertices, v1); + }else { + SETVALUE(bdz->g, v2, (8-(GETVALUE(bdz->g,v0)+GETVALUE(bdz->g, v1)))%3); + SETBIT(marked_vertices, v2); + } + DEBUGP("A:%u %u %u -- %u %u %u\n", v0, v1, v2, GETVALUE(bdz->g, v0), GETVALUE(bdz->g, v1), GETVALUE(bdz->g, v2)); + }; + free(marked_vertices); +} + + +static void ranking(bdz_config_data_t *bdz) +{ + cmph_uint32 i, j, offset = 0, count = 0, size = (bdz->k >> 2), nbytes_total = (bdz->n >> 2)+1, nbytes; + bdz->ranktable = (cmph_uint32 *)calloc(bdz->ranktablesize, sizeof(cmph_uint32)); + // ranktable computation + bdz->ranktable[0] = 0; + i = 1; + while(1) + { + nbytes = size < nbytes_total? size : nbytes_total; + for(j = 0; j < nbytes; j++) + { + count += bdz_lookup_table[*(bdz->g + offset + j)]; + } + if(i == bdz->ranktablesize) fprintf(stderr, "i:%u == bdz->ranktablesize:%u\n", i, bdz->ranktablesize); + assert(i < bdz->ranktablesize); + bdz->ranktable[i] = count; + offset += nbytes; + if(size >= nbytes_total) break; + nbytes_total -= size; + i++; + } +} + + +int bdz_dump(cmph_t *mphf, FILE *fd) +{ + char *buf = NULL; + cmph_uint32 buflen; + bdz_data_t *data = (bdz_data_t *)mphf->data; + __cmph_dump(mphf, fd); + + hash_state_dump(data->hl, &buf, &buflen); + DEBUGP("Dumping hash state with %u bytes to disk\n", buflen); + fwrite(&buflen, sizeof(cmph_uint32), 1, fd); + fwrite(buf, buflen, 1, fd); + free(buf); + + fwrite(&(data->n), sizeof(cmph_uint32), 1, fd); + fwrite(&(data->m), sizeof(cmph_uint32), 1, fd); + fwrite(&(data->r), sizeof(cmph_uint32), 1, fd); + + fwrite(data->g, sizeof(cmph_uint8)*((data->n >> 2) +1), 1, fd); + + fwrite(&(data->k), sizeof(cmph_uint32), 1, fd); + fwrite(&(data->b), sizeof(cmph_uint8), 1, fd); + fwrite(&(data->ranktablesize), sizeof(cmph_uint32), 1, fd); + + fwrite(data->ranktable, sizeof(cmph_uint32)*(data->ranktablesize), 1, fd); + #ifdef DEBUG + cmph_uint32 i; + fprintf(stderr, "G: "); + for (i = 0; i < data->n; ++i) fprintf(stderr, "%u ", GETVALUE(data->g, i)); + fprintf(stderr, "\n"); + #endif + return 1; +} + +void bdz_load(FILE *f, cmph_t *mphf) +{ + char *buf = NULL; + cmph_uint32 buflen; + bdz_data_t *bdz = (bdz_data_t *)malloc(sizeof(bdz_data_t)); + + DEBUGP("Loading bdz mphf\n"); + mphf->data = bdz; + + fread(&buflen, sizeof(cmph_uint32), 1, f); + DEBUGP("Hash state has %u bytes\n", buflen); + buf = (char *)malloc(buflen); + fread(buf, buflen, 1, f); + bdz->hl = hash_state_load(buf, buflen); + free(buf); + + + DEBUGP("Reading m and n\n"); + fread(&(bdz->n), sizeof(cmph_uint32), 1, f); + fread(&(bdz->m), sizeof(cmph_uint32), 1, f); + fread(&(bdz->r), sizeof(cmph_uint32), 1, f); + + bdz->g = (cmph_uint8 *)calloc((bdz->n >> 2) + 1, sizeof(cmph_uint8)); + fread(bdz->g, ((bdz->n >> 2) + 1)*sizeof(cmph_uint8), 1, f); + + fread(&(bdz->k), sizeof(cmph_uint32), 1, f); + fread(&(bdz->b), sizeof(cmph_uint8), 1, f); + fread(&(bdz->ranktablesize), sizeof(cmph_uint32), 1, f); + + bdz->ranktable = (cmph_uint32 *)calloc(bdz->ranktablesize, sizeof(cmph_uint32)); + fread(bdz->ranktable, sizeof(cmph_uint32)*(bdz->ranktablesize), 1, f); + + #ifdef DEBUG + fprintf(stderr, "G: "); + for (i = 0; i < bdz->n; ++i) fprintf(stderr, "%u ", GETVALUE(bdz->g,i)); + fprintf(stderr, "\n"); + #endif + return; +} + + +cmph_uint32 bdz_search_ph(cmph_t *mphf, const char *key, cmph_uint32 keylen) +{ + bdz_data_t *bdz = mphf->data; + cmph_uint32 hl[3]; + hash_vector(bdz->hl, key, keylen, hl); + cmph_uint32 vertex; + hl[0] = hl[0] % bdz->r; + hl[1] = hl[1] % bdz->r + bdz->r; + hl[2] = hl[2] % bdz->r + (bdz->r << 1); + vertex = hl[(GETVALUE(bdz->g, hl[0]) + GETVALUE(bdz->g, hl[1]) + GETVALUE(bdz->g, hl[2])) % 3]; + return vertex; +} + +static inline cmph_uint32 rank(bdz_data_t *bdz, cmph_uint32 vertex) +{ + cmph_uint32 index = vertex >> bdz->b; + cmph_uint32 base_rank = bdz->ranktable[index]; + cmph_uint32 beg_idx_v = index << bdz->b; + cmph_uint32 beg_idx_b = beg_idx_v >> 2; + cmph_uint32 end_idx_b = vertex >> 2; + while(beg_idx_b < end_idx_b) + { + base_rank += bdz_lookup_table[*(bdz->g + beg_idx_b++)]; + + } + beg_idx_v = beg_idx_b << 2; + while(beg_idx_v < vertex) + { + if(GETVALUE(bdz->g, beg_idx_v) != UNASSIGNED) base_rank++; + beg_idx_v++; + } + + return base_rank; +} + +cmph_uint32 bdz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen) +{ + bdz_data_t *bdz = mphf->data; + cmph_uint32 hl[3]; + hash_vector(bdz->hl, key, keylen, hl); + cmph_uint32 vertex; + hl[0] = hl[0] % bdz->r; + hl[1] = hl[1] % bdz->r + bdz->r; + hl[2] = hl[2] % bdz->r + (bdz->r << 1); + vertex = hl[(GETVALUE(bdz->g, hl[0]) + GETVALUE(bdz->g, hl[1]) + GETVALUE(bdz->g, hl[2])) % 3]; + return rank(bdz, vertex); +} + + +void bdz_destroy(cmph_t *mphf) +{ + bdz_data_t *data = (bdz_data_t *)mphf->data; + free(data->g); + hash_state_destroy(data->hl); + free(data->ranktable); + free(data); + free(mphf); +} diff --git a/src/bdz.h b/src/bdz.h new file mode 100755 index 0000000..0b15e01 --- /dev/null +++ b/src/bdz.h @@ -0,0 +1,19 @@ +#ifndef __CMPH_BDZ_H__ +#define __CMPH_BDZ_H__ + +#include "cmph.h" + +typedef struct __bdz_data_t bdz_data_t; +typedef struct __bdz_config_data_t bdz_config_data_t; + +bdz_config_data_t *bdz_config_new(); +void bdz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs); +void bdz_config_destroy(cmph_config_t *mph); +void bdz_config_set_b(cmph_config_t *mph, cmph_uint8 b); +cmph_t *bdz_new(cmph_config_t *mph, float c); + +void bdz_load(FILE *f, cmph_t *mphf); +int bdz_dump(cmph_t *mphf, FILE *f); +void bdz_destroy(cmph_t *mphf); +cmph_uint32 bdz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen); +#endif diff --git a/src/bdz_gen_lookup_table.c b/src/bdz_gen_lookup_table.c new file mode 100755 index 0000000..b8f6606 --- /dev/null +++ b/src/bdz_gen_lookup_table.c @@ -0,0 +1,33 @@ +#include +#include +#include +void help(char * prname) +{ + fprintf(stderr, "USE: %s \n", prname); + exit(1); +} + +int main(int argc, char ** argv) +{ + if(argc != 3) help(argv[0]); + int n = atoi(argv[1]); + int wordsize = (atoi(argv[2]) >> 1); + int i, j, n_assigned; + for(i = 0; i < n; i++) + { + int num = i; + n_assigned = 0; + for(j = 0; j < wordsize; j++) + { + if ((num & 0x0003) != 3) + { + n_assigned++; + //fprintf(stderr, "num:%d\n", num); + } + num = num >> 2; + } + if(i%16 == 0) fprintf(stderr, "\n"); + fprintf(stderr, "%d, ", n_assigned); + } + fprintf(stderr, "\n"); +} diff --git a/src/bdz_structs.h b/src/bdz_structs.h new file mode 100755 index 0000000..ba7dc3c --- /dev/null +++ b/src/bdz_structs.h @@ -0,0 +1,36 @@ +#ifndef __CMPH_BDZ_STRUCTS_H__ +#define __CMPH_BDZ_STRUCTS_H__ + +#include "hash_state.h" + +struct __bdz_data_t +{ + cmph_uint32 m; //edges (words) count + cmph_uint32 n; //vertex count + cmph_uint32 r; //partition vertex count + cmph_uint8 *g; + hash_state_t *hl; // linear hashing + + cmph_uint32 k; //kth index in ranktable, $k = log_2(n=3r)/\varepsilon$ + cmph_uint8 b; // number of bits of k + cmph_uint32 ranktablesize; //number of entries in ranktable, $n/k +1$ + cmph_uint32 *ranktable; // rank table +}; + + +struct __bdz_config_data_t +{ + cmph_uint32 m; //edges (words) count + cmph_uint32 n; //vertex count + cmph_uint32 r; //partition vertex count + cmph_uint8 *g; + hash_state_t *hl; // linear hashing + + cmph_uint32 k; //kth index in ranktable, $k = log_2(n=3r)/\varepsilon$ + cmph_uint8 b; // number of bits of k + cmph_uint32 ranktablesize; //number of entries in ranktable, $n/k +1$ + cmph_uint32 *ranktable; // rank table + CMPH_HASH hashfunc; +}; + +#endif diff --git a/src/bitbool.c b/src/bitbool.c index 07279e4..a97b1ae 100644 --- a/src/bitbool.c +++ b/src/bitbool.c @@ -1,2 +1,3 @@ #include "bitbool.h" const cmph_uint8 bitmask[] = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 }; +const cmph_uint8 valuemask[] = { 0xfc, 0xf3, 0xcf, 0x3f}; diff --git a/src/bitbool.h b/src/bitbool.h index 5428919..ec0250a 100644 --- a/src/bitbool.h +++ b/src/bitbool.h @@ -2,8 +2,17 @@ #define _CMPH_BITBOOL_H__ #include "cmph_types.h" extern const cmph_uint8 bitmask[]; -#define GETBIT(array, i) (array[(i) / 8] & bitmask[(i) % 8]) -#define SETBIT(array, i) (array[(i) / 8] |= bitmask[(i) % 8]) -#define UNSETBIT(array, i) (array[(i) / 8] &= (~(bitmask[(i) % 8]))) + +#define GETBIT(array, i) ((array[i >> 3] & bitmask[i & 0x00000007]) >> (i & 0x00000007)) +#define SETBIT(array, i) (array[i >> 3] |= bitmask[i & 0x00000007]) +#define UNSETBIT(array, i) (array[i >> 3] &= (~(bitmask[i & 0x00000007]))) + +//#define GETBIT(array, i) (array[(i) / 8] & bitmask[(i) % 8]) +//#define SETBIT(array, i) (array[(i) / 8] |= bitmask[(i) % 8]) +//#define UNSETBIT(array, i) (array[(i) / 8] &= (~(bitmask[(i) % 8]))) + +extern const cmph_uint8 valuemask[]; +#define SETVALUE(array, i, v) (array[i >> 2] &= ((v << ((i & 0x00000003) << 1)) | valuemask[i & 0x00000003])) +#define GETVALUE(array, i) ((array[i >> 2] >> ((i & 0x00000003) << 1)) & 0x00000003) #endif diff --git a/src/bmz.c b/src/bmz.c index 7371c0e..ba972ff 100644 --- a/src/bmz.c +++ b/src/bmz.c @@ -226,7 +226,7 @@ static cmph_uint8 bmz_traverse_critical_nodes(bmz_config_data_t *bmz, cmph_uint3 vqueue_destroy(q); return 1; // restart mapping step. } - if (GETBIT(used_edges, next_g + bmz->g[lav])) + if (GETBIT(used_edges, (next_g + bmz->g[lav]))) { collision = 1; break; @@ -241,7 +241,7 @@ static cmph_uint8 bmz_traverse_critical_nodes(bmz_config_data_t *bmz, cmph_uint3 { if (graph_node_is_critical(bmz->graph, lav) && GETBIT(visited, lav)) { - SETBIT(used_edges,next_g + bmz->g[lav]); + SETBIT(used_edges,(next_g + bmz->g[lav])); if(next_g + bmz->g[lav] > *biggest_edge_value) *biggest_edge_value = next_g + bmz->g[lav]; } } @@ -306,7 +306,7 @@ static cmph_uint8 bmz_traverse_critical_nodes_heuristic(bmz_config_data_t *bmz, free(unused_g_values); return 1; // restart mapping step. } - if (GETBIT(used_edges, next_g + bmz->g[lav])) + if (GETBIT(used_edges, (next_g + bmz->g[lav]))) { collision = 1; break; @@ -334,7 +334,7 @@ static cmph_uint8 bmz_traverse_critical_nodes_heuristic(bmz_config_data_t *bmz, { if (graph_node_is_critical(bmz->graph, lav) && GETBIT(visited, lav)) { - SETBIT(used_edges,next_g + bmz->g[lav]); + SETBIT(used_edges,(next_g + bmz->g[lav])); if(next_g + bmz->g[lav] > *biggest_edge_value) *biggest_edge_value = next_g + bmz->g[lav]; } } diff --git a/src/bmz8.c b/src/bmz8.c index 032a3bc..f18322a 100644 --- a/src/bmz8.c +++ b/src/bmz8.c @@ -233,7 +233,7 @@ static cmph_uint8 bmz8_traverse_critical_nodes(bmz8_config_data_t *bmz8, cmph_ui vqueue_destroy(q); return 1; // restart mapping step. } - if (GETBIT(used_edges, next_g + bmz8->g[lav])) + if (GETBIT(used_edges, (next_g + bmz8->g[lav]))) { collision = 1; break; @@ -248,7 +248,7 @@ static cmph_uint8 bmz8_traverse_critical_nodes(bmz8_config_data_t *bmz8, cmph_ui { if (graph_node_is_critical(bmz8->graph, lav) && GETBIT(visited, lav)) { - SETBIT(used_edges,next_g + bmz8->g[lav]); + SETBIT(used_edges,(next_g + bmz8->g[lav])); if(next_g + bmz8->g[lav] > *biggest_edge_value) *biggest_edge_value = next_g + bmz8->g[lav]; } } @@ -313,7 +313,7 @@ static cmph_uint8 bmz8_traverse_critical_nodes_heuristic(bmz8_config_data_t *bmz free(unused_g_values); return 1; // restart mapping step. } - if (GETBIT(used_edges, next_g + bmz8->g[lav])) + if (GETBIT(used_edges, (next_g + bmz8->g[lav]))) { collision = 1; break; @@ -342,7 +342,7 @@ static cmph_uint8 bmz8_traverse_critical_nodes_heuristic(bmz8_config_data_t *bmz { if (graph_node_is_critical(bmz8->graph, lav) && GETBIT(visited, lav)) { - SETBIT(used_edges,next_g + bmz8->g[lav]); + SETBIT(used_edges,(next_g + bmz8->g[lav])); if(next_g + bmz8->g[lav] > *biggest_edge_value) *biggest_edge_value = next_g + bmz8->g[lav]; } } diff --git a/src/cmph b/src/cmph deleted file mode 100755 index 1d01e49..0000000 --- a/src/cmph +++ /dev/null @@ -1,131 +0,0 @@ -#! /bin/bash - -# cmph - temporary wrapper script for .libs/cmph -# Generated by ltmain.sh - GNU libtool 1.5.24 Debian 1.5.24-1ubuntu1 (1.1220.2.456 2007/06/24 02:25:32) -# -# The cmph program cannot be directly executed until all the libtool -# libraries that it depends on are installed. -# -# This wrapper script should never be moved out of the build directory. -# If it is, it will not operate correctly. - -# Sed substitution that helps us do robust quoting. It backslashifies -# metacharacters that are still active within double-quoted strings. -Xsed='/bin/sed -e 1s/^X//' -sed_quote_subst='s/\([\\`\\"$\\\\]\)/\\\1/g' - -# Be Bourne compatible (taken from Autoconf:_AS_BOURNE_COMPATIBLE). -if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then - emulate sh - NULLCMD=: - # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which - # is contrary to our usage. Disable this feature. - alias -g '${1+"$@"}'='"$@"' - setopt NO_GLOB_SUBST -else - case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac -fi -BIN_SH=xpg4; export BIN_SH # for Tru64 -DUALCASE=1; export DUALCASE # for MKS sh - -# The HP-UX ksh and POSIX shell print the target directory to stdout -# if CDPATH is set. -(unset CDPATH) >/dev/null 2>&1 && unset CDPATH - -relink_command="(cd /home/fbotelho/doutorado/algoritmos/cmph/src; { test -z \"\${LIBRARY_PATH+set}\" || unset LIBRARY_PATH || { LIBRARY_PATH=; export LIBRARY_PATH; }; }; { test -z \"\${COMPILER_PATH+set}\" || unset COMPILER_PATH || { COMPILER_PATH=; export COMPILER_PATH; }; }; { test -z \"\${GCC_EXEC_PREFIX+set}\" || unset GCC_EXEC_PREFIX || { GCC_EXEC_PREFIX=; export GCC_EXEC_PREFIX; }; }; { test -z \"\${LD_RUN_PATH+set}\" || unset LD_RUN_PATH || { LD_RUN_PATH=; export LD_RUN_PATH; }; }; { test -z \"\${LD_LIBRARY_PATH+set}\" || unset LD_LIBRARY_PATH || { LD_LIBRARY_PATH=; export LD_LIBRARY_PATH; }; }; PATH=\"/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games\"; export PATH; gcc -Wall -Werror -o \$progdir/\$file main.o wingetopt.o -lm ./.libs/libcmph.so -Wl,--rpath -Wl,/home/fbotelho/doutorado/algoritmos/cmph/src/.libs ) " - -# This environment variable determines our operation mode. -if test "$libtool_install_magic" = "%%%MAGIC variable%%%"; then - # install mode needs the following variable: - notinst_deplibs=' libcmph.la' -else - # When we are sourced in execute mode, $file and $echo are already set. - if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then - echo="echo" - file="$0" - # Make sure echo works. - if test "X$1" = X--no-reexec; then - # Discard the --no-reexec flag, and continue. - shift - elif test "X`($echo '\t') 2>/dev/null`" = 'X\t'; then - # Yippee, $echo works! - : - else - # Restart under the correct shell, and then maybe $echo will work. - exec /bin/bash "$0" --no-reexec ${1+"$@"} - fi - fi - - # Find the directory that this script lives in. - thisdir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'` - test "x$thisdir" = "x$file" && thisdir=. - - # Follow symbolic links until we get to the real thisdir. - file=`ls -ld "$file" | /bin/sed -n 's/.*-> //p'` - while test -n "$file"; do - destdir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'` - - # If there was a directory component, then change thisdir. - if test "x$destdir" != "x$file"; then - case "$destdir" in - [\\/]* | [A-Za-z]:[\\/]*) thisdir="$destdir" ;; - *) thisdir="$thisdir/$destdir" ;; - esac - fi - - file=`$echo "X$file" | $Xsed -e 's%^.*/%%'` - file=`ls -ld "$thisdir/$file" | /bin/sed -n 's/.*-> //p'` - done - - # Try to get the absolute directory name. - absdir=`cd "$thisdir" && pwd` - test -n "$absdir" && thisdir="$absdir" - - program=lt-'cmph' - progdir="$thisdir/.libs" - - if test ! -f "$progdir/$program" || \ - { file=`ls -1dt "$progdir/$program" "$progdir/../$program" 2>/dev/null | /bin/sed 1q`; \ - test "X$file" != "X$progdir/$program"; }; then - - file="$$-$program" - - if test ! -d "$progdir"; then - mkdir "$progdir" - else - rm -f "$progdir/$file" - fi - - # relink executable if necessary - if test -n "$relink_command"; then - if relink_command_output=`eval $relink_command 2>&1`; then : - else - echo "$relink_command_output" >&2 - rm -f "$progdir/$file" - exit 1 - fi - fi - - mv -f "$progdir/$file" "$progdir/$program" 2>/dev/null || - { rm -f "$progdir/$program"; - mv -f "$progdir/$file" "$progdir/$program"; } - rm -f "$progdir/$file" - fi - - if test -f "$progdir/$program"; then - if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then - # Run the actual program with our arguments. - - exec "$progdir/$program" ${1+"$@"} - - $echo "$0: cannot exec $program $*" - exit 1 - fi - else - # The program doesn't exist. - $echo "$0: error: \`$progdir/$program' does not exist" 1>&2 - $echo "This script is just a wrapper for $program." 1>&2 - echo "See the libtool documentation for more information." 1>&2 - exit 1 - fi -fi diff --git a/src/cmph.c b/src/cmph.c index 6d753d4..a4bdbd4 100644 --- a/src/cmph.c +++ b/src/cmph.c @@ -1,10 +1,11 @@ #include "cmph.h" #include "cmph_structs.h" #include "chm.h" -#include "bmz.h" +#include "bmz.h" /* included -- Fabiano */ #include "bmz8.h" /* included -- Fabiano */ #include "brz.h" /* included -- Fabiano */ #include "fch.h" /* included -- Fabiano */ +#include "bdz.h" /* included -- Fabiano */ #include #include @@ -12,7 +13,7 @@ //#define DEBUG #include "debug.h" -const char *cmph_names[] = { "bmz", "bmz8", "chm", "brz", "fch", NULL }; /* included -- Fabiano */ +const char *cmph_names[] = { "bmz", "bmz8", "chm", "brz", "fch", "bdz", NULL }; /* included -- Fabiano */ typedef struct { @@ -223,6 +224,9 @@ void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo) case CMPH_FCH: fch_config_destroy(mph); break; + case CMPH_BDZ: + bdz_config_destroy(mph); + break; default: assert(0); } @@ -243,6 +247,9 @@ void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo) case CMPH_FCH: mph->data = fch_config_new(); break; + case CMPH_BDZ: + mph->data = bdz_config_new(); + break; default: assert(0); } @@ -273,6 +280,10 @@ void cmph_config_set_b(cmph_config_t *mph, cmph_uint8 b) { brz_config_set_b(mph, b); } + else if (mph->algo == CMPH_BDZ) + { + bdz_config_set_b(mph, b); + } } void cmph_config_set_memory_availability(cmph_config_t *mph, cmph_uint32 memory_availability) @@ -295,13 +306,16 @@ void cmph_config_destroy(cmph_config_t *mph) bmz_config_destroy(mph); break; case CMPH_BMZ8: /* included -- Fabiano */ - bmz8_config_destroy(mph); + bmz8_config_destroy(mph); break; case CMPH_BRZ: /* included -- Fabiano */ - brz_config_destroy(mph); + brz_config_destroy(mph); break; case CMPH_FCH: /* included -- Fabiano */ - fch_config_destroy(mph); + fch_config_destroy(mph); + break; + case CMPH_BDZ: /* included -- Fabiano */ + bdz_config_destroy(mph); break; default: assert(0); @@ -333,6 +347,9 @@ void cmph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs) case CMPH_FCH: /* included -- Fabiano */ fch_config_set_hashfuncs(mph, hashfuncs); break; + case CMPH_BDZ: /* included -- Fabiano */ + bdz_config_set_hashfuncs(mph, hashfuncs); + break; default: break; } @@ -374,6 +391,10 @@ cmph_t *cmph_new(cmph_config_t *mph) DEBUGP("Creating fch hash\n"); mphf = fch_new(mph, c); break; + case CMPH_BDZ: /* included -- Fabiano */ + DEBUGP("Creating bdz hash\n"); + mphf = bdz_new(mph, c); + break; default: assert(0); } @@ -394,6 +415,8 @@ int cmph_dump(cmph_t *mphf, FILE *f) return brz_dump(mphf, f); case CMPH_FCH: /* included -- Fabiano */ return fch_dump(mphf, f); + case CMPH_BDZ: /* included -- Fabiano */ + return bdz_dump(mphf, f); default: assert(0); } @@ -429,6 +452,10 @@ cmph_t *cmph_load(FILE *f) DEBUGP("Loading fch algorithm dependent parts\n"); fch_load(f, mphf); break; + case CMPH_BDZ: /* included -- Fabiano */ + DEBUGP("Loading bdz algorithm dependent parts\n"); + bdz_load(f, mphf); + break; default: assert(0); } @@ -456,6 +483,9 @@ cmph_uint32 cmph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen) case CMPH_FCH: /* included -- Fabiano */ DEBUGP("fch algorithm search\n"); return fch_search(mphf, key, keylen); + case CMPH_BDZ: /* included -- Fabiano */ + DEBUGP("bdz algorithm search\n"); + return bdz_search(mphf, key, keylen); default: assert(0); } @@ -487,6 +517,9 @@ void cmph_destroy(cmph_t *mphf) case CMPH_FCH: /* included -- Fabiano */ fch_destroy(mphf); return; + case CMPH_BDZ: /* included -- Fabiano */ + bdz_destroy(mphf); + return; default: assert(0); } diff --git a/src/cmph_types.h b/src/cmph_types.h index 941059b..1359adb 100644 --- a/src/cmph_types.h +++ b/src/cmph_types.h @@ -6,10 +6,9 @@ typedef unsigned short cmph_uint16; typedef unsigned int cmph_uint32; typedef float cmph_float32; -typedef enum { CMPH_HASH_DJB2, CMPH_HASH_FNV, CMPH_HASH_JENKINS, - CMPH_HASH_SDBM, CMPH_HASH_COUNT } CMPH_HASH; +typedef enum { CMPH_HASH_JENKINS, CMPH_HASH_COUNT } CMPH_HASH; extern const char *cmph_hash_names[]; -typedef enum { CMPH_BMZ, CMPH_BMZ8, CMPH_CHM, CMPH_BRZ, CMPH_FCH, CMPH_COUNT } CMPH_ALGO; /* included -- Fabiano */ +typedef enum { CMPH_BMZ, CMPH_BMZ8, CMPH_CHM, CMPH_BRZ, CMPH_FCH, CMPH_BDZ, CMPH_COUNT } CMPH_ALGO; /* included -- Fabiano */ extern const char *cmph_names[]; #endif diff --git a/src/hash.c b/src/hash.c index 925043a..7a754d8 100644 --- a/src/hash.c +++ b/src/hash.c @@ -7,7 +7,7 @@ //#define DEBUG #include "debug.h" -const char *cmph_hash_names[] = { "djb2", "fnv", "jenkins", "sdbm", NULL }; +const char *cmph_hash_names[] = { "jenkins", NULL }; hash_state_t *hash_state_new(CMPH_HASH hashfunc, cmph_uint32 hashsize) { @@ -19,15 +19,6 @@ hash_state_t *hash_state_new(CMPH_HASH hashfunc, cmph_uint32 hashsize) state = (hash_state_t *)jenkins_state_new(hashsize); DEBUGP("Jenkins function created\n"); break; - case CMPH_HASH_DJB2: - state = (hash_state_t *)djb2_state_new(); - break; - case CMPH_HASH_SDBM: - state = (hash_state_t *)sdbm_state_new(); - break; - case CMPH_HASH_FNV: - state = (hash_state_t *)fnv_state_new(); - break; default: assert(0); } @@ -40,12 +31,6 @@ cmph_uint32 hash(hash_state_t *state, const char *key, cmph_uint32 keylen) { case CMPH_HASH_JENKINS: return jenkins_hash((jenkins_state_t *)state, key, keylen); - case CMPH_HASH_DJB2: - return djb2_hash((djb2_state_t *)state, key, keylen); - case CMPH_HASH_SDBM: - return sdbm_hash((sdbm_state_t *)state, key, keylen); - case CMPH_HASH_FNV: - return fnv_hash((fnv_state_t *)state, key, keylen); default: assert(0); } @@ -53,6 +38,19 @@ cmph_uint32 hash(hash_state_t *state, const char *key, cmph_uint32 keylen) return 0; } +void hash_vector(hash_state_t *state, const char *key, cmph_uint32 keylen, cmph_uint32 * hashes) +{ + switch (state->hashfunc) + { + case CMPH_HASH_JENKINS: + jenkins_hash_vector((jenkins_state_t *)state, key, keylen, hashes); + break; + default: + assert(0); + } +} + + void hash_state_dump(hash_state_t *state, char **buf, cmph_uint32 *buflen) { char *algobuf; @@ -62,18 +60,6 @@ void hash_state_dump(hash_state_t *state, char **buf, cmph_uint32 *buflen) jenkins_state_dump((jenkins_state_t *)state, &algobuf, buflen); if (*buflen == UINT_MAX) return; break; - case CMPH_HASH_DJB2: - djb2_state_dump((djb2_state_t *)state, &algobuf, buflen); - if (*buflen == UINT_MAX) return; - break; - case CMPH_HASH_SDBM: - sdbm_state_dump((sdbm_state_t *)state, &algobuf, buflen); - if (*buflen == UINT_MAX) return; - break; - case CMPH_HASH_FNV: - fnv_state_dump((fnv_state_t *)state, &algobuf, buflen); - if (*buflen == UINT_MAX) return; - break; default: assert(0); } @@ -94,15 +80,6 @@ hash_state_t * hash_state_copy(hash_state_t *src_state) case CMPH_HASH_JENKINS: dest_state = (hash_state_t *)jenkins_state_copy((jenkins_state_t *)src_state); break; - case CMPH_HASH_DJB2: - dest_state = (hash_state_t *)djb2_state_copy((djb2_state_t *)src_state); - break; - case CMPH_HASH_SDBM: - dest_state = (hash_state_t *)sdbm_state_copy((sdbm_state_t *)src_state); - break; - case CMPH_HASH_FNV: - dest_state = (hash_state_t *)fnv_state_copy((fnv_state_t *)src_state); - break; default: assert(0); } @@ -129,12 +106,6 @@ hash_state_t *hash_state_load(const char *buf, cmph_uint32 buflen) { case CMPH_HASH_JENKINS: return (hash_state_t *)jenkins_state_load(buf + offset, buflen - offset); - case CMPH_HASH_DJB2: - return (hash_state_t *)djb2_state_load(buf + offset, buflen - offset); - case CMPH_HASH_SDBM: - return (hash_state_t *)sdbm_state_load(buf + offset, buflen - offset); - case CMPH_HASH_FNV: - return (hash_state_t *)fnv_state_load(buf + offset, buflen - offset); default: return NULL; } @@ -147,15 +118,6 @@ void hash_state_destroy(hash_state_t *state) case CMPH_HASH_JENKINS: jenkins_state_destroy((jenkins_state_t *)state); break; - case CMPH_HASH_DJB2: - djb2_state_destroy((djb2_state_t *)state); - break; - case CMPH_HASH_SDBM: - sdbm_state_destroy((sdbm_state_t *)state); - break; - case CMPH_HASH_FNV: - fnv_state_destroy((fnv_state_t *)state); - break; default: assert(0); } diff --git a/src/hash.h b/src/hash.h index 2735a22..092fe1c 100644 --- a/src/hash.h +++ b/src/hash.h @@ -6,10 +6,29 @@ typedef union __hash_state_t hash_state_t; hash_state_t *hash_state_new(CMPH_HASH, cmph_uint32 hashsize); + +/** \fn cmph_uint32 hash(hash_state_t *state, const char *key, cmph_uint32 keylen); + * \param state is a pointer to a hash_state_t structure + * \param key is a pointer to a key + * \param keylen is the key length + * \return an integer that represents a hash value of 32 bits. + */ cmph_uint32 hash(hash_state_t *state, const char *key, cmph_uint32 keylen); + +/** \fn void hash_vector(hash_state_t *state, const char *key, cmph_uint32 keylen, cmph_uint32 * hashes); + * \param state is a pointer to a hash_state_t structure + * \param key is a pointer to a key + * \param keylen is the key length + * \param hashes is a pointer to a memory large enough to fit three 32-bit integers. + */ +void hash_vector(hash_state_t *state, const char *key, cmph_uint32 keylen, cmph_uint32 * hashes); + void hash_state_dump(hash_state_t *state, char **buf, cmph_uint32 *buflen); + hash_state_t * hash_state_copy(hash_state_t *src_state); + hash_state_t *hash_state_load(const char *buf, cmph_uint32 buflen); + void hash_state_destroy(hash_state_t *state); #endif diff --git a/src/hash_state.h b/src/hash_state.h index 67dcd77..1b567dc 100644 --- a/src/hash_state.h +++ b/src/hash_state.h @@ -3,16 +3,10 @@ #include "hash.h" #include "jenkins_hash.h" -#include "djb2_hash.h" -#include "sdbm_hash.h" -#include "fnv_hash.h" union __hash_state_t { CMPH_HASH hashfunc; jenkins_state_t jenkins; - djb2_state_t djb2; - sdbm_state_t sdbm; - fnv_state_t fnv; }; #endif diff --git a/src/jenkins_hash.c b/src/jenkins_hash.c index 843571c..934dd0c 100644 --- a/src/jenkins_hash.c +++ b/src/jenkins_hash.c @@ -159,6 +159,59 @@ cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keyl return c; } +void jenkins_hash_vector(jenkins_state_t *state, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes) +{ + cmph_uint32 len, length; + + /* Set up the internal state */ + length = keylen; + len = length; + hashes[0] = hashes[1] = 0x9e3779b9; /* the golden ratio; an arbitrary value */ + hashes[2] = state->seed; /* the previous hash value - seed in our case */ + + /*---------------------------------------- handle most of the key */ + while (len >= 12) + { + hashes[0] += (k[0] +((cmph_uint32)k[1]<<8) +((cmph_uint32)k[2]<<16) +((cmph_uint32)k[3]<<24)); + hashes[1] += (k[4] +((cmph_uint32)k[5]<<8) +((cmph_uint32)k[6]<<16) +((cmph_uint32)k[7]<<24)); + hashes[2] += (k[8] +((cmph_uint32)k[9]<<8) +((cmph_uint32)k[10]<<16)+((cmph_uint32)k[11]<<24)); + mix(hashes[0],hashes[1],hashes[2]); + k += 12; len -= 12; + } + + /*------------------------------------- handle the last 11 bytes */ + hashes[2] += length; + switch(len) /* all the case statements fall through */ + { + case 11: + hashes[2] +=((cmph_uint32)k[10]<<24); + case 10: + hashes[2] +=((cmph_uint32)k[9]<<16); + case 9 : + hashes[2] +=((cmph_uint32)k[8]<<8); + /* the first byte of hashes[2] is reserved for the length */ + case 8 : + hashes[1] +=((cmph_uint32)k[7]<<24); + case 7 : + hashes[1] +=((cmph_uint32)k[6]<<16); + case 6 : + hashes[1] +=((cmph_uint32)k[5]<<8); + case 5 : + hashes[1] +=k[4]; + case 4 : + hashes[0] +=((cmph_uint32)k[3]<<24); + case 3 : + hashes[0] +=((cmph_uint32)k[2]<<16); + case 2 : + hashes[0] +=((cmph_uint32)k[1]<<8); + case 1 : + hashes[0] +=k[0]; + /* case 0: nothing left to add */ + } + + mix(hashes[0],hashes[1],hashes[2]); +} + void jenkins_state_dump(jenkins_state_t *state, char **buf, cmph_uint32 *buflen) { *buflen = sizeof(cmph_uint32); diff --git a/src/jenkins_hash.h b/src/jenkins_hash.h index 17b0cf9..df04627 100644 --- a/src/jenkins_hash.h +++ b/src/jenkins_hash.h @@ -10,7 +10,23 @@ typedef struct __jenkins_state_t } jenkins_state_t; jenkins_state_t *jenkins_state_new(cmph_uint32 size); //size of hash table + +/** \fn cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keylen); + * \param state is a pointer to a jenkins_state_t structure + * \param key is a pointer to a key + * \param keylen is the key length + * \return an integer that represents a hash value of 32 bits. + */ cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keylen); + +/** \fn void jenkins_hash_vector(jenkins_state_t *state, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes); + * \param state is a pointer to a jenkins_state_t structure + * \param key is a pointer to a key + * \param keylen is the key length + * \param hashes is a pointer to a memory large enough to fit three 32-bit integers. + */ +void jenkins_hash_vector(jenkins_state_t *state, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes); + void jenkins_state_dump(jenkins_state_t *state, char **buf, cmph_uint32 *buflen); jenkins_state_t *jenkins_state_copy(jenkins_state_t *src_state); jenkins_state_t *jenkins_state_load(const char *buf, cmph_uint32 buflen); diff --git a/src/libcmph.la b/src/libcmph.la deleted file mode 100644 index 5da6d0a..0000000 --- a/src/libcmph.la +++ /dev/null @@ -1,35 +0,0 @@ -# libcmph.la - a libtool library file -# Generated by ltmain.sh - GNU libtool 1.5.24 Debian 1.5.24-1ubuntu1 (1.1220.2.456 2007/06/24 02:25:32) -# -# Please DO NOT delete this file! -# It is necessary for linking the library. - -# The name that we can dlopen(3). -dlname='libcmph.so.0' - -# Names of this library. -library_names='libcmph.so.0.0.0 libcmph.so.0 libcmph.so' - -# The name of the static archive. -old_library='libcmph.a' - -# Libraries that this one depends upon. -dependency_libs=' -lm' - -# Version information for libcmph. -current=0 -age=0 -revision=0 - -# Is this an already installed library? -installed=no - -# Should we warn about portability when linking against -modules? -shouldnotlink=no - -# Files to dlopen/dlpreopen -dlopen='' -dlpreopen='' - -# Directory that this library needs to be installed in: -libdir='/usr/local/lib'