diff --git a/src/czech.c b/src/czech.c index ac6bec0..16efe3f 100644 --- a/src/czech.c +++ b/src/czech.c @@ -1,4 +1,3 @@ - #include "czech.h" #include "cmph_structs.h" #include "czech_structs.h" @@ -9,12 +8,18 @@ #include #include #include +#include //#define DEBUG #include "debug.h" +static const char bitmask[8] = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 }; +#define GETBIT(array, i) (array[(i) / 8] & bitmask[(i) % 8]) +#define SETBIT(array, i) (array[(i) / 8] |= bitmask[(i) % 8]) +#define UNSETBIT(array, i) (array[(i) / 8] &= (~(bitmask[(i) % 8]))) + static int czech_gen_edges(mph_t *mph); -static void czech_traverse(czech_mph_data_t *czech, char *visited, uint32 v); +static void czech_traverse(czech_mph_data_t *czech, uint8 *visited, uint32 v); mph_t *czech_mph_new(key_source_t *key_source) { @@ -64,11 +69,11 @@ mphf_t *czech_mph_create(mph_t *mph, float c) czech_mphf_data_t *czechf = NULL; uint32 i; - uint32 iterations = 10; - char *visited = NULL; + uint32 iterations = 20; + uint8 *visited = NULL; czech_mph_data_t *czech = (czech_mph_data_t *)mph->data; czech->m = mph->key_source->nkeys; - czech->n = (uint32)ceil(c * mph->key_source->nkeys); + czech->n = ceil(c * mph->key_source->nkeys); DEBUGP("m (edges): %u n (vertices): %u c: %f\n", czech->m, czech->n, c); czech->graph = graph_new(czech->n, czech->m); DEBUGP("Created graph\n"); @@ -114,21 +119,14 @@ mphf_t *czech_mph_create(mph_t *mph, float c) fprintf(stderr, "Starting assignment step\n"); } DEBUGP("Assignment step\n"); - visited = (char *)malloc(czech->n); - memset(visited, 0, czech->n); + visited = (char *)malloc(czech->n/8 + 1); + memset(visited, 0, czech->n/8 + 1); free(czech->g); czech->g = malloc(czech->n * sizeof(uint32)); assert(czech->g); - if (!czech->g) - { - fprintf(stderr, "out of memory"); - free(visited); - graph_destroy(czech->graph); - return NULL; - } for (i = 0; i < czech->n; ++i) { - if (!visited[i]) + if (!GETBIT(visited,i)) { czech->g[i] = 0; czech_traverse(czech, visited, i); @@ -157,18 +155,18 @@ mphf_t *czech_mph_create(mph_t *mph, float c) return mphf; } -static void czech_traverse(czech_mph_data_t *czech, char *visited, uint32 v) +static void czech_traverse(czech_mph_data_t *czech, uint8 *visited, uint32 v) { graph_iterator_t it = graph_neighbors_it(czech->graph, v); uint32 neighbor = 0; - visited[v] = 1; + SETBIT(visited,v); DEBUGP("Visiting vertex %u\n", v); while((neighbor = graph_next_neighbor(czech->graph, &it)) != GRAPH_NO_NEIGHBOR) { DEBUGP("Visiting neighbor %u\n", neighbor); - if(visited[neighbor]) continue; + if(GETBIT(visited,neighbor)) continue; DEBUGP("Visiting neighbor %u\n", neighbor); DEBUGP("Visiting edge %u->%u with id %u\n", v, neighbor, graph_edge_id(czech->graph, v, neighbor)); czech->g[neighbor] = graph_edge_id(czech->graph, v, neighbor) - czech->g[v]; @@ -197,7 +195,7 @@ static int czech_gen_edges(mph_t *mph) if (h1 == h2) if (++h2 >= czech->n) h2 = 0; if (h1 == h2) { - if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e); + if (mph->verbosity) fprintf(stderr, "Self loop for key %e\n", e); mph->key_source->dispose(mph->key_source->data, key, keylen); return 0; } @@ -216,29 +214,39 @@ int czech_mphf_dump(mphf_t *mphf, FILE *fd) { char *buf = NULL; uint32 buflen; - uint32 two = 2; //number of hash functions + uint32 nbuflen; + uint32 i; + uint32 two = htonl(2); //number of hash functions czech_mphf_data_t *data = (czech_mphf_data_t *)mphf->data; + uint32 nn, nm; __mphf_dump(mphf, fd); fwrite(&two, sizeof(uint32), 1, fd); hash_state_dump(data->hashes[0], &buf, &buflen); DEBUGP("Dumping hash state with %u bytes to disk\n", buflen); - fwrite(&buflen, sizeof(uint32), 1, fd); + nbuflen = htonl(buflen); + fwrite(&nbuflen, sizeof(uint32), 1, fd); fwrite(buf, buflen, 1, fd); free(buf); hash_state_dump(data->hashes[1], &buf, &buflen); DEBUGP("Dumping hash state with %u bytes to disk\n", buflen); - fwrite(&buflen, sizeof(uint32), 1, fd); + nbuflen = htonl(buflen); + fwrite(&nbuflen, sizeof(uint32), 1, fd); fwrite(buf, buflen, 1, fd); free(buf); - fwrite(&(data->n), sizeof(uint32), 1, fd); - fwrite(&(data->m), sizeof(uint32), 1, fd); - - fwrite(data->g, sizeof(uint32)*data->n, 1, fd); + nn = htonl(data->n); + fwrite(&nn, sizeof(uint32), 1, fd); + nm = htonl(data->m); + fwrite(&nm, sizeof(uint32), 1, fd); + for (i = 0; i < data->n; ++i) + { + uint32 ng = htonl(data->g[i]); + fwrite(&ng, sizeof(uint32), 1, fd); + } #ifdef DEBUG fprintf(stderr, "G: "); for (i = 0; i < data->n; ++i) fprintf(stderr, "%u ", data->g[i]); @@ -250,14 +258,17 @@ int czech_mphf_dump(mphf_t *mphf, FILE *fd) void czech_mphf_load(FILE *f, mphf_t *mphf) { uint32 nhashes; + char fbuf[BUFSIZ]; char *buf = NULL; uint32 buflen; uint32 i; + hash_state_t *state; czech_mphf_data_t *czech = (czech_mphf_data_t *)malloc(sizeof(czech_mphf_data_t)); DEBUGP("Loading czech mphf\n"); mphf->data = czech; fread(&nhashes, sizeof(uint32), 1, f); + nhashes = ntohl(nhashes); czech->hashes = (hash_state_t **)malloc(sizeof(hash_state_t *)*(nhashes + 1)); czech->hashes[nhashes] = NULL; DEBUGP("Reading %u hashes\n", nhashes); @@ -265,6 +276,7 @@ void czech_mphf_load(FILE *f, mphf_t *mphf) { hash_state_t *state = NULL; fread(&buflen, sizeof(uint32), 1, f); + buflen = ntohl(buflen); DEBUGP("Hash state has %u bytes\n", buflen); buf = (char *)malloc(buflen); fread(buf, buflen, 1, f); @@ -275,17 +287,18 @@ void czech_mphf_load(FILE *f, mphf_t *mphf) DEBUGP("Reading m and n\n"); fread(&(czech->n), sizeof(uint32), 1, f); + czech->n = ntohl(czech->n); fread(&(czech->m), sizeof(uint32), 1, f); - + czech->m = ntohl(czech->m); + czech->g = (uint32 *)malloc(sizeof(uint32)*czech->n); fread(czech->g, czech->n*sizeof(uint32), 1, f); - /* + for (i = 0; i < czech->n; ++i) czech->g[i] = ntohl(czech->g[i]); #ifdef DEBUG fprintf(stderr, "G: "); for (i = 0; i < czech->n; ++i) fprintf(stderr, "%u ", czech->g[i]); fprintf(stderr, "\n"); #endif - */ return; }