diff --git a/src/bmz.c b/src/bmz.c index 98b8ec6..124f4a3 100644 --- a/src/bmz.c +++ b/src/bmz.c @@ -22,8 +22,9 @@ static const char bitmask[8] = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << #define UNSETBIT(array, i) (array[(i) / 8] &= (~(bitmask[(i) % 8]))) static int bmz_gen_edges(mph_t *mph); -static void bmz_traverse_critical_nodes(bmz_mph_data_t *bmz, uint32 v, uint32 * biggest_g_value, uint32 * biggest_edge_value, uint8 * used_edges); -static void bmz_traverse_non_critical_nodes(bmz_mph_data_t *bmz, uint8 * used_edges); +static uint8 bmz_traverse_critical_nodes(bmz_mph_data_t *bmz, uint32 v, uint32 * biggest_g_value, uint32 * biggest_edge_value, uint8 * used_edges, uint8 * visited); +static uint8 bmz_traverse_critical_nodes_heuristic(bmz_mph_data_t *bmz, uint32 v, uint32 * biggest_g_value, uint32 * biggest_edge_value, uint8 * used_edges, uint8 * visited); +static void bmz_traverse_non_critical_nodes(bmz_mph_data_t *bmz, uint8 * used_edges, uint8 * visited); mph_t *bmz_mph_new(key_source_t *key_source) @@ -72,12 +73,13 @@ mphf_t *bmz_mph_create(mph_t *mph, float bmz_c) { mphf_t *mphf = NULL; bmz_mphf_data_t *bmzf = NULL; - uint32 i; - uint32 iterations = 10; + uint32 iterations; + uint32 iterations_map = 20; uint8 *used_edges = NULL; - uint32 biggest_g_value = 0; - uint32 biggest_edge_value = 1; + uint8 restart_mapping = 0; + uint8 * visited = NULL; + DEBUGP("bmz_c: %f\n", bmz_c); bmz_mph_data_t *bmz = (bmz_mph_data_t *)mph->data; bmz->m = mph->key_source->nkeys; @@ -89,13 +91,18 @@ mphf_t *bmz_mph_create(mph_t *mph, float bmz_c) bmz->hashes = (hash_state_t **)malloc(sizeof(hash_state_t *)*3); for(i = 0; i < 3; ++i) bmz->hashes[i] = NULL; - // Mapping step - if (mph->verbosity) + do { + // Mapping step + uint32 biggest_g_value = 0; + uint32 biggest_edge_value = 1; + iterations = 20; + if (mph->verbosity) + { fprintf(stderr, "Entering mapping step for mph creation of %u keys with graph sized %u\n", bmz->m, bmz->n); - } - while(1) - { + } + while(1) + { int ok; DEBUGP("hash function 1\n"); bmz->hashes[0] = hash_state_new(bmz->hashfuncs[0], bmz->n); @@ -118,53 +125,63 @@ mphf_t *bmz_mph_create(mph_t *mph, float bmz_c) if (iterations == 0) break; } else break; - } - if (iterations == 0) - { + } + if (iterations == 0) + { graph_destroy(bmz->graph); return NULL; - } + } - // Ordering step - if (mph->verbosity) - { + // Ordering step + if (mph->verbosity) + { fprintf(stderr, "Starting ordering step\n"); - } + } - graph_obtain_critical_nodes(bmz->graph); + graph_obtain_critical_nodes(bmz->graph); - // Searching step - if (mph->verbosity) - { + // Searching step + if (mph->verbosity) + { fprintf(stderr, "Starting Searching step.\n"); fprintf(stderr, "\tTraversing critical vertices.\n"); - } - DEBUGP("Searching step\n"); - - used_edges = (uint8 *)malloc((bmz->m*sizeof(uint8))/8 + 1); - memset(used_edges, 0, bmz->m/8 + 1); - free(bmz->g); - bmz->g = malloc(bmz->n * sizeof(uint32)); - assert(bmz->g); - for (i = 0; i < bmz->n; ++i) bmz->g[i] = UNDEFINED; - - for (i = 0; i < bmz->n; ++i) // critical nodes - { - if (graph_node_is_critical(bmz->graph, i) && (bmz->g[i] == UNDEFINED)) + } + DEBUGP("Searching step\n"); + visited = (char *)malloc(bmz->n/8 + 1); + memset(visited, 0, bmz->n/8 + 1); + used_edges = (uint8 *)malloc(bmz->m/8 + 1); + memset(used_edges, 0, bmz->m/8 + 1); + free(bmz->g); + bmz->g = malloc(bmz->n * sizeof(uint32)); + assert(bmz->g); + for (i = 0; i < bmz->n; ++i) // critical nodes + { + if (graph_node_is_critical(bmz->graph, i) && (!GETBIT(visited,i))) { - bmz_traverse_critical_nodes(bmz, i, &biggest_g_value, &biggest_edge_value, used_edges); + if(bmz_c > 1.14) restart_mapping = bmz_traverse_critical_nodes(bmz, i, &biggest_g_value, &biggest_edge_value, used_edges, visited); + else restart_mapping = bmz_traverse_critical_nodes_heuristic(bmz, i, &biggest_g_value, &biggest_edge_value, used_edges, visited); + if(restart_mapping) break; } - } - if (mph->verbosity) - { - fprintf(stderr, "\tTraversing non critical vertices.\n"); - } - - bmz_traverse_non_critical_nodes(bmz, used_edges); // non_critical_nodes + } + if(!restart_mapping) + { + if (mph->verbosity) + { + fprintf(stderr, "\tTraversing non critical vertices.\n"); + } + bmz_traverse_non_critical_nodes(bmz, used_edges, visited); // non_critical_nodes + } + else + { + iterations_map--; + if (mph->verbosity) fprintf(stderr, "Restarting mapping step. %u iterations remaining.\n", iterations_map); + } + free(used_edges); + free(visited); + }while(restart_mapping && iterations_map > 0); graph_destroy(bmz->graph); - free(used_edges); bmz->graph = NULL; - + if (iterations_map == 0) return NULL; mphf = (mphf_t *)malloc(sizeof(mphf_t)); mphf->algo = mph->algo; bmzf = (bmz_mphf_data_t *)malloc(sizeof(bmz_mph_data_t)); @@ -184,19 +201,19 @@ mphf_t *bmz_mph_create(mph_t *mph, float bmz_c) return mphf; } -static void bmz_traverse_critical_nodes(bmz_mph_data_t *bmz, uint32 v, uint32 * biggest_g_value, uint32 * biggest_edge_value, uint8 * used_edges) +static uint8 bmz_traverse_critical_nodes(bmz_mph_data_t *bmz, uint32 v, uint32 * biggest_g_value, uint32 * biggest_edge_value, uint8 * used_edges, uint8 * visited) { uint32 next_g; uint32 u; /* Auxiliary vertex */ uint32 lav; /* lookahead vertex */ uint8 collision; - vqueue_t * q = vqueue_new((uint32)(0.5*graph_ncritical_nodes(bmz->graph))); + vqueue_t * q = vqueue_new((uint32)(0.5*graph_ncritical_nodes(bmz->graph)) + 1); graph_iterator_t it, it1; DEBUGP("Labelling critical vertices\n"); bmz->g[v] = (uint32)ceil ((double)(*biggest_edge_value)/2) - 1; + SETBIT(visited, v); next_g = (uint32)floor((double)(*biggest_edge_value/2)); /* next_g is incremented in the do..while statement*/ - *biggest_g_value = next_g; vqueue_insert(q, v); while(!vqueue_is_empty(q)) { @@ -204,7 +221,7 @@ static void bmz_traverse_critical_nodes(bmz_mph_data_t *bmz, uint32 v, uint32 * it = graph_neighbors_it(bmz->graph, v); while ((u = graph_next_neighbor(bmz->graph, &it)) != GRAPH_NO_NEIGHBOR) { - if (graph_node_is_critical(bmz->graph, u) && (bmz->g[u] == UNDEFINED)) + if (graph_node_is_critical(bmz->graph, u) && (!GETBIT(visited,u))) { collision = 1; while(collision) // lookahead to resolve collisions @@ -214,9 +231,13 @@ static void bmz_traverse_critical_nodes(bmz_mph_data_t *bmz, uint32 v, uint32 * collision = 0; while((lav = graph_next_neighbor(bmz->graph, &it1)) != GRAPH_NO_NEIGHBOR) { - if (graph_node_is_critical(bmz->graph, lav) && (bmz->g[lav] != UNDEFINED)) + if (graph_node_is_critical(bmz->graph, lav) && GETBIT(visited,lav)) { - assert(next_g + bmz->g[lav] < bmz->m); + if(next_g + bmz->g[lav] >= bmz->m) + { + vqueue_destroy(q); + return 1; // restart mapping step. + } if (GETBIT(used_edges, next_g + bmz->g[lav])) { collision = 1; @@ -230,20 +251,115 @@ static void bmz_traverse_critical_nodes(bmz_mph_data_t *bmz, uint32 v, uint32 * it1 = graph_neighbors_it(bmz->graph, u); while((lav = graph_next_neighbor(bmz->graph, &it1)) != GRAPH_NO_NEIGHBOR) { - if (graph_node_is_critical(bmz->graph, lav) && (bmz->g[lav] != UNDEFINED)) + if (graph_node_is_critical(bmz->graph, lav) && GETBIT(visited, lav)) { SETBIT(used_edges,next_g + bmz->g[lav]); if(next_g + bmz->g[lav] > *biggest_edge_value) *biggest_edge_value = next_g + bmz->g[lav]; } } bmz->g[u] = next_g; // Labelling vertex u. + SETBIT(visited,u); vqueue_insert(q, u); } } } vqueue_destroy(q); - + return 0; +} + +static uint8 bmz_traverse_critical_nodes_heuristic(bmz_mph_data_t *bmz, uint32 v, uint32 * biggest_g_value, uint32 * biggest_edge_value, uint8 * used_edges, uint8 * visited) +{ + uint32 next_g; + uint32 u; /* Auxiliary vertex */ + uint32 lav; /* lookahead vertex */ + uint8 collision; + uint32 * unused_g_values = NULL; + uint32 unused_g_values_capacity = 0; + uint32 nunused_g_values = 0; + vqueue_t * q = vqueue_new((uint32)(0.5*graph_ncritical_nodes(bmz->graph))+1); + graph_iterator_t it, it1; + + DEBUGP("Labelling critical vertices\n"); + bmz->g[v] = (uint32)ceil ((double)(*biggest_edge_value)/2) - 1; + SETBIT(visited, v); + next_g = (uint32)floor((double)(*biggest_edge_value/2)); /* next_g is incremented in the do..while statement*/ + vqueue_insert(q, v); + while(!vqueue_is_empty(q)) + { + v = vqueue_remove(q); + it = graph_neighbors_it(bmz->graph, v); + while ((u = graph_next_neighbor(bmz->graph, &it)) != GRAPH_NO_NEIGHBOR) + { + if (graph_node_is_critical(bmz->graph, u) && (!GETBIT(visited,u))) + { + uint32 next_g_index = 0; + collision = 1; + while(collision) // lookahead to resolve collisions + { + if (next_g_index < nunused_g_values) + { + next_g = unused_g_values[next_g_index++]; + } + else + { + next_g = *biggest_g_value + 1; + next_g_index = UINT_MAX; + } + it1 = graph_neighbors_it(bmz->graph, u); + collision = 0; + while((lav = graph_next_neighbor(bmz->graph, &it1)) != GRAPH_NO_NEIGHBOR) + { + if (graph_node_is_critical(bmz->graph, lav) && GETBIT(visited,lav)) + { + if(next_g + bmz->g[lav] >= bmz->m) + { + vqueue_destroy(q); + free(unused_g_values); + return 1; // restart mapping step. + } + if (GETBIT(used_edges, next_g + bmz->g[lav])) + { + collision = 1; + break; + } + } + } + if(collision && (next_g > *biggest_g_value)) // saving the current g value stored in next_g. + { + if(nunused_g_values == unused_g_values_capacity) + { + unused_g_values = realloc(unused_g_values, (unused_g_values_capacity + BUFSIZ)*sizeof(uint32)); + unused_g_values_capacity += BUFSIZ; + } + unused_g_values[nunused_g_values++] = next_g; + + } + if (next_g > *biggest_g_value) *biggest_g_value = next_g; + } + next_g_index--; + if (next_g_index < nunused_g_values) unused_g_values[next_g_index] = unused_g_values[--nunused_g_values]; + + // Marking used edges... + it1 = graph_neighbors_it(bmz->graph, u); + while((lav = graph_next_neighbor(bmz->graph, &it1)) != GRAPH_NO_NEIGHBOR) + { + if (graph_node_is_critical(bmz->graph, lav) && GETBIT(visited, lav)) + { + SETBIT(used_edges,next_g + bmz->g[lav]); + if(next_g + bmz->g[lav] > *biggest_edge_value) *biggest_edge_value = next_g + bmz->g[lav]; + } + } + bmz->g[u] = next_g; // Labelling vertex u. + SETBIT(visited, u); + vqueue_insert(q, u); + } + } + + } + vqueue_destroy(q); + free(unused_g_values); + return 0; } static uint32 next_unused_edge(bmz_mph_data_t *bmz, uint8 * used_edges, uint32 unused_edge_index) @@ -257,23 +373,24 @@ static uint32 next_unused_edge(bmz_mph_data_t *bmz, uint8 * used_edges, uint32 u return unused_edge_index; } -static void bmz_traverse(bmz_mph_data_t *bmz, uint8 * used_edges, uint32 v, uint32 * unused_edge_index) +static void bmz_traverse(bmz_mph_data_t *bmz, uint8 * used_edges, uint32 v, uint32 * unused_edge_index, uint8 * visited) { graph_iterator_t it = graph_neighbors_it(bmz->graph, v); uint32 neighbor = 0; while((neighbor = graph_next_neighbor(bmz->graph, &it)) != GRAPH_NO_NEIGHBOR) { + if(GETBIT(visited,neighbor)) continue; DEBUGP("Visiting neighbor %u\n", neighbor); - if(bmz->g[neighbor] != UNDEFINED) continue; *unused_edge_index = next_unused_edge(bmz, used_edges, *unused_edge_index); bmz->g[neighbor] = *unused_edge_index - bmz->g[v]; + SETBIT(visited, neighbor); (*unused_edge_index)++; - bmz_traverse(bmz, used_edges, neighbor, unused_edge_index); + bmz_traverse(bmz, used_edges, neighbor, unused_edge_index, visited); } } -static void bmz_traverse_non_critical_nodes(bmz_mph_data_t *bmz, uint8 * used_edges) +static void bmz_traverse_non_critical_nodes(bmz_mph_data_t *bmz, uint8 * used_edges, uint8 * visited) { uint32 i, v1, v2, unused_edge_index = 0; @@ -282,17 +399,19 @@ static void bmz_traverse_non_critical_nodes(bmz_mph_data_t *bmz, uint8 * used_ed { v1 = graph_vertex_id(bmz->graph, i, 0); v2 = graph_vertex_id(bmz->graph, i, 1); - if((bmz->g[v1] != UNDEFINED && bmz->g[v2] != UNDEFINED) || (bmz->g[v1] == UNDEFINED && bmz->g[v2] == UNDEFINED)) continue; - if(bmz->g[v1] != UNDEFINED) bmz_traverse(bmz, used_edges, v1, &unused_edge_index); - else bmz_traverse(bmz, used_edges, v2, &unused_edge_index); + if((GETBIT(visited,v1) && GETBIT(visited,v2)) || (!GETBIT(visited,v1) && !GETBIT(visited,v2))) continue; + if(GETBIT(visited,v1)) bmz_traverse(bmz, used_edges, v1, &unused_edge_index, visited); + else bmz_traverse(bmz, used_edges, v2, &unused_edge_index, visited); + } for(i = 0; i < bmz->n; i++) { - if(bmz->g[i] == UNDEFINED) + if(!GETBIT(visited,i)) { bmz->g[i] = 0; - bmz_traverse(bmz, used_edges, i, &unused_edge_index); + SETBIT(visited, i); + bmz_traverse(bmz, used_edges, i, &unused_edge_index, visited); } } diff --git a/src/cmph.c b/src/cmph.c index 06d2a56..9b9bb3c 100644 --- a/src/cmph.c +++ b/src/cmph.c @@ -69,18 +69,18 @@ void mph_set_hashfuncs(mph_t *mph, CMPH_HASH *hashfuncs) return; } -mphf_t *mph_create(mph_t *mph) +mphf_t *mph_create(mph_t *mph, float c) { mphf_t *mphf = NULL; switch (mph->algo) { case MPH_CZECH: DEBUGP("Creating czech hash\n"); - mphf = czech_mph_create(mph, 2.09); + mphf = czech_mph_create(mph, c); break; case MPH_BMZ: /* included -- Fabiano */ DEBUGP("Creating bmz hash\n"); - mphf = bmz_mph_create(mph, 1.15); + mphf = bmz_mph_create(mph, c); break; default: assert(0); diff --git a/src/cmph.h b/src/cmph.h index 2224a42..c8cdbf2 100644 --- a/src/cmph.h +++ b/src/cmph.h @@ -28,7 +28,7 @@ mph_t *mph_new(MPH_ALGO algo, key_source_t *key_source); void mph_set_hashfuncs(mph_t *mph, CMPH_HASH *hashfuncs); void mph_set_verbosity(mph_t *mph, uint32 verbosity); void mph_destroy(mph_t *mph); -mphf_t *mph_create(mph_t *mph); +mphf_t *mph_create(mph_t *mph, float c); /** Hash querying API **/ mphf_t *mphf_load(FILE *f); diff --git a/src/graph.c b/src/graph.c index 00b3fca..c41f2c9 100644 --- a/src/graph.c +++ b/src/graph.c @@ -10,7 +10,7 @@ //#define DEBUG #include "debug.h" -static const char bitmask[8] = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 }; +static uint8 bitmask[8] = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 }; #define GETBIT(array, i) (array[(i) / 8] & bitmask[(i) % 8]) #define SETBIT(array, i) (array[(i) / 8] |= bitmask[(i) % 8]) #define UNSETBIT(array, i) (array[(i) / 8] &= (~(bitmask[(i) % 8]))) @@ -52,7 +52,7 @@ graph_t *graph_new(uint32 nnodes, uint32 nedges) void graph_destroy(graph_t *graph) { - DEBUGP("Destroying graph\n"); + DEBUGP("Destroying graph\n"); free(graph->edges); free(graph->first); free(graph->next); @@ -103,7 +103,7 @@ void graph_add_edge(graph_t *g, uint32 v1, uint32 v2) static int check_edge(graph_t *g, uint32 e, uint32 v1, uint32 v2) { - DEBUGP("Checking edge %u %u looking for %u %u\n", g->edges[abs_edge(e, 0)], g->edges[abs_edge(e, 1)], v1, v2); + DEBUGP("Checking edge %u %u looking for %u %u\n", g->edges[abs_edge(e, 0)], g->edges[abs_edge(e, 1)], v1, v2); if (g->edges[abs_edge(e, 0)] == v1 && g->edges[abs_edge(e, 1)] == v2) return 1; if (g->edges[abs_edge(e, 0)] == v2 && g->edges[abs_edge(e, 1)] == v1) return 1; return 0; @@ -207,7 +207,7 @@ static void cyclic_del_edge(graph_t *g, uint32 v, char *deleted) if (!degree1) return; while(1) { - DEBUGP("Deleting edge %u (%u->%u)\n", e, g->edges[abs_edge(e, 0)], g->edges[abs_edge(e, 1)]); + DEBUGP("Deleting edge %u (%u->%u)\n", e, g->edges[abs_edge(e, 0)], g->edges[abs_edge(e, 1)]); SETBIT(deleted, abs_edge(e, 0)); v2 = g->edges[abs_edge(e, 0)]; @@ -217,7 +217,7 @@ static void cyclic_del_edge(graph_t *g, uint32 v, char *deleted) degree1 = find_degree1_edge(g, v2, deleted, &e); if (degree1) { - DEBUGP("Inspecting vertex %u\n", v2); + DEBUGP("Inspecting vertex %u\n", v2); v1 = v2; } else break; @@ -240,7 +240,7 @@ int graph_is_cyclic(graph_t *g) { if (!(GETBIT(deleted, i))) { - DEBUGP("Edge %u %u->%u was not deleted\n", i, g->edges[i], g->edges[i + g->nedges]); + DEBUGP("Edge %u %u->%u was not deleted\n", i, g->edges[i], g->edges[i + g->nedges]); free(deleted); return 1; } @@ -260,8 +260,10 @@ void graph_obtain_critical_nodes(graph_t *g) /* included -- Fabiano*/ uint32 v; char *deleted = (char *)malloc((g->nedges*sizeof(char))/8+1); memset(deleted, 0, g->nedges/8 + 1); + free(g->critical_nodes); g->critical_nodes = (uint8 *)malloc((g->nnodes*sizeof(uint8))/8 + 1); g->ncritical_nodes = 0; + memset(g->critical_nodes, 0, (g->nnodes*sizeof(uint8))/8 + 1); DEBUGP("Looking for the 2-core in graph with %u vertices and %u edges\n", g->nnodes, g->nedges); for (v = 0; v < g->nnodes; ++v) { diff --git a/src/main.c b/src/main.c index ea93e3b..d092473 100644 --- a/src/main.c +++ b/src/main.c @@ -12,7 +12,7 @@ void usage(const char *prg) { - fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k] [-g [-s seed] ] [-m file.mph] [-a algorithm] keysfile\n", prg); + fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-g [-c value][-s seed] ] [-m file.mph] [-a algorithm] keysfile\n", prg); } void usage_long(const char *prg) { @@ -20,6 +20,7 @@ void usage_long(const char *prg) fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k] [-g [-s seed] ] [-m file.mph] [-a algorithm] keysfile\n", prg); fprintf(stderr, "Minimum perfect hashing tool\n\n"); fprintf(stderr, " -h\t print this help message\n"); + fprintf(stderr, " -c\t c value that determines the number of vertices in the graph\n"); fprintf(stderr, " -a\t algorithm - valid values are\n"); for (i = 0; i < MPH_COUNT; ++i) fprintf(stderr, " \t * %s\n", mph_names[i]); fprintf(stderr, " -f\t hash function (may be used multiple times) - valid values are\n"); @@ -98,6 +99,7 @@ int main(int argc, char **argv) uint32 nhashes = 0; uint32 i; MPH_ALGO mph_algo = MPH_CZECH; + float c = 2.09; mph_t *mph = NULL; mphf_t *mphf = NULL; @@ -105,9 +107,9 @@ int main(int argc, char **argv) while (1) { - char c = getopt(argc, argv, "hVvk:a:f:gm:s:"); - if (c == -1) break; - switch (c) + char ch = getopt(argc, argv, "hVvgc:k:a:f:m:s:"); + if (ch == -1) break; + switch (ch) { case 's': { @@ -119,6 +121,16 @@ int main(int argc, char **argv) } } break; + case 'c': + { + char *endptr; + c = strtod(optarg, &endptr); + if(*endptr != 0) { + fprintf(stderr, "Invalid c value %s\n", optarg); + exit(1); + } + } + break; case 'g': generate = 1; break; @@ -229,7 +241,8 @@ int main(int argc, char **argv) mph = mph_new(mph_algo, &source); if (nhashes) mph_set_hashfuncs(mph, hashes); mph_set_verbosity(mph, verbosity); - mphf = mph_create(mph); + if(mph_algo == MPH_BMZ && c >= 2.0) c=1.15; + mphf = mph_create(mph, c); if (mphf == NULL) {