version with cmph prefix
This commit is contained in:
parent
2c837e225e
commit
ea71f288b3
314
src/bmz.c
314
src/bmz.c
@ -13,33 +13,33 @@
|
|||||||
//#define DEBUG
|
//#define DEBUG
|
||||||
#include "debug.h"
|
#include "debug.h"
|
||||||
|
|
||||||
//static uint32 UNDEFINED = UINT_MAX;
|
//static cmph_uint32 UNDEFINED = UINT_MAX;
|
||||||
|
|
||||||
static const char bitmask[8] = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
|
static const char bitmask[8] = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
|
||||||
#define GETBIT(array, i) (array[(i) / 8] & bitmask[(i) % 8])
|
#define GETBIT(array, i) (array[(i) / 8] & bitmask[(i) % 8])
|
||||||
#define SETBIT(array, i) (array[(i) / 8] |= bitmask[(i) % 8])
|
#define SETBIT(array, i) (array[(i) / 8] |= bitmask[(i) % 8])
|
||||||
#define UNSETBIT(array, i) (array[(i) / 8] &= (~(bitmask[(i) % 8])))
|
#define UNSETBIT(array, i) (array[(i) / 8] &= (~(bitmask[(i) % 8])))
|
||||||
|
|
||||||
static int bmz_gen_edges(mph_t *mph);
|
static int bmz_gen_edges(cmph_mph_t *mph);
|
||||||
static uint8 bmz_traverse_critical_nodes(bmz_mph_data_t *bmz, uint32 v, uint32 * biggest_g_value, uint32 * biggest_edge_value, uint8 * used_edges, uint8 * visited);
|
static cmph_uint8 bmz_traverse_critical_nodes(cmph_bmz_mph_data_t *bmz, cmph_uint32 v, cmph_uint32 * biggest_g_value, cmph_uint32 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited);
|
||||||
static uint8 bmz_traverse_critical_nodes_heuristic(bmz_mph_data_t *bmz, uint32 v, uint32 * biggest_g_value, uint32 * biggest_edge_value, uint8 * used_edges, uint8 * visited);
|
static cmph_uint8 bmz_traverse_critical_nodes_heuristic(cmph_bmz_mph_data_t *bmz, cmph_uint32 v, cmph_uint32 * biggest_g_value, cmph_uint32 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited);
|
||||||
static void bmz_traverse_non_critical_nodes(bmz_mph_data_t *bmz, uint8 * used_edges, uint8 * visited);
|
static void bmz_traverse_non_critical_nodes(cmph_bmz_mph_data_t *bmz, cmph_uint8 * used_edges, cmph_uint8 * visited);
|
||||||
|
|
||||||
|
|
||||||
mph_t *bmz_mph_new(key_source_t *key_source)
|
cmph_mph_t *cmph_bmz_mph_new(cmph_key_source_t *key_source)
|
||||||
{
|
{
|
||||||
mph_t *mph = NULL;
|
cmph_mph_t *mph = NULL;
|
||||||
bmz_mph_data_t *bmz = NULL;
|
cmph_bmz_mph_data_t *bmz = NULL;
|
||||||
mph = __mph_new(MPH_BMZ, key_source);
|
mph = cmph__mph_new(CMPH_BMZ, key_source);
|
||||||
if (mph == NULL) return NULL;
|
if (mph == NULL) return NULL;
|
||||||
bmz = (bmz_mph_data_t *)malloc(sizeof(bmz_mph_data_t));
|
bmz = (cmph_bmz_mph_data_t *)malloc(sizeof(cmph_bmz_mph_data_t));
|
||||||
if (bmz == NULL)
|
if (bmz == NULL)
|
||||||
{
|
{
|
||||||
__mph_destroy(mph);
|
cmph__mph_destroy(mph);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
bmz->hashfuncs[0] = HASH_JENKINS;
|
bmz->hashfuncs[0] = CMPH_HASH_JENKINS;
|
||||||
bmz->hashfuncs[1] = HASH_JENKINS;
|
bmz->hashfuncs[1] = CMPH_HASH_JENKINS;
|
||||||
bmz->g = NULL;
|
bmz->g = NULL;
|
||||||
bmz->graph = NULL;
|
bmz->graph = NULL;
|
||||||
bmz->hashes = NULL;
|
bmz->hashes = NULL;
|
||||||
@ -47,20 +47,20 @@ mph_t *bmz_mph_new(key_source_t *key_source)
|
|||||||
assert(mph->data);
|
assert(mph->data);
|
||||||
return mph;
|
return mph;
|
||||||
}
|
}
|
||||||
void bmz_mph_destroy(mph_t *mph)
|
void cmph_bmz_mph_destroy(cmph_mph_t *mph)
|
||||||
{
|
{
|
||||||
bmz_mph_data_t *data = (bmz_mph_data_t *)mph->data;
|
cmph_bmz_mph_data_t *data = (cmph_bmz_mph_data_t *)mph->data;
|
||||||
DEBUGP("Destroying algorithm dependent data\n");
|
DEBUGP("Destroying algorithm dependent data\n");
|
||||||
free(data);
|
free(data);
|
||||||
__mph_destroy(mph);
|
cmph__mph_destroy(mph);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bmz_mph_set_hashfuncs(mph_t *mph, CMPH_HASH *hashfuncs)
|
void cmph_bmz_mph_set_hashfuncs(cmph_mph_t *mph, CMPH_HASH *hashfuncs)
|
||||||
{
|
{
|
||||||
bmz_mph_data_t *bmz = (bmz_mph_data_t *)mph->data;
|
cmph_bmz_mph_data_t *bmz = (cmph_bmz_mph_data_t *)mph->data;
|
||||||
CMPH_HASH *hashptr = hashfuncs;
|
CMPH_HASH *hashptr = hashfuncs;
|
||||||
uint32 i = 0;
|
cmph_uint32 i = 0;
|
||||||
while(*hashptr != HASH_COUNT)
|
while(*hashptr != CMPH_HASH_COUNT)
|
||||||
{
|
{
|
||||||
if (i >= 2) break; //bmz only uses two hash functions
|
if (i >= 2) break; //bmz only uses two hash functions
|
||||||
bmz->hashfuncs[i] = *hashptr;
|
bmz->hashfuncs[i] = *hashptr;
|
||||||
@ -68,33 +68,33 @@ void bmz_mph_set_hashfuncs(mph_t *mph, CMPH_HASH *hashfuncs)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mphf_t *bmz_mph_create(mph_t *mph, float bmz_c)
|
cmph_mphf_t *cmph_bmz_mph_create(cmph_mph_t *mph, float bmz_c)
|
||||||
{
|
{
|
||||||
mphf_t *mphf = NULL;
|
cmph_mphf_t *mphf = NULL;
|
||||||
bmz_mphf_data_t *bmzf = NULL;
|
cmph_bmz_mphf_data_t *bmzf = NULL;
|
||||||
uint32 i;
|
cmph_uint32 i;
|
||||||
uint32 iterations;
|
cmph_uint32 iterations;
|
||||||
uint32 iterations_map = 20;
|
cmph_uint32 iterations_map = 20;
|
||||||
uint8 *used_edges = NULL;
|
cmph_uint8 *used_edges = NULL;
|
||||||
uint8 restart_mapping = 0;
|
cmph_uint8 restart_mapping = 0;
|
||||||
uint8 * visited = NULL;
|
cmph_uint8 * visited = NULL;
|
||||||
|
|
||||||
DEBUGP("bmz_c: %f\n", bmz_c);
|
DEBUGP("bmz_c: %f\n", bmz_c);
|
||||||
bmz_mph_data_t *bmz = (bmz_mph_data_t *)mph->data;
|
cmph_bmz_mph_data_t *bmz = (cmph_bmz_mph_data_t *)mph->data;
|
||||||
bmz->m = mph->key_source->nkeys;
|
bmz->m = mph->key_source->nkeys;
|
||||||
bmz->n = ceil(bmz_c * mph->key_source->nkeys);
|
bmz->n = ceil(bmz_c * mph->key_source->nkeys);
|
||||||
DEBUGP("m (edges): %u n (vertices): %u bmz_c: %f\n", bmz->m, bmz->n, bmz_c);
|
DEBUGP("m (edges): %u n (vertices): %u bmz_c: %f\n", bmz->m, bmz->n, bmz_c);
|
||||||
bmz->graph = graph_new(bmz->n, bmz->m);
|
bmz->graph = cmph_graph_new(bmz->n, bmz->m);
|
||||||
DEBUGP("Created graph\n");
|
DEBUGP("Created graph\n");
|
||||||
|
|
||||||
bmz->hashes = (hash_state_t **)malloc(sizeof(hash_state_t *)*3);
|
bmz->hashes = (cmph_hash_state_t **)malloc(sizeof(cmph_hash_state_t *)*3);
|
||||||
for(i = 0; i < 3; ++i) bmz->hashes[i] = NULL;
|
for(i = 0; i < 3; ++i) bmz->hashes[i] = NULL;
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
// Mapping step
|
// Mapping step
|
||||||
uint32 biggest_g_value = 0;
|
cmph_uint32 biggest_g_value = 0;
|
||||||
uint32 biggest_edge_value = 1;
|
cmph_uint32 biggest_edge_value = 1;
|
||||||
iterations = 20;
|
iterations = 20;
|
||||||
if (mph->verbosity)
|
if (mph->verbosity)
|
||||||
{
|
{
|
||||||
@ -104,17 +104,17 @@ mphf_t *bmz_mph_create(mph_t *mph, float bmz_c)
|
|||||||
{
|
{
|
||||||
int ok;
|
int ok;
|
||||||
DEBUGP("hash function 1\n");
|
DEBUGP("hash function 1\n");
|
||||||
bmz->hashes[0] = hash_state_new(bmz->hashfuncs[0], bmz->n);
|
bmz->hashes[0] = cmph_hash_state_new(bmz->hashfuncs[0], bmz->n);
|
||||||
DEBUGP("hash function 2\n");
|
DEBUGP("hash function 2\n");
|
||||||
bmz->hashes[1] = hash_state_new(bmz->hashfuncs[1], bmz->n);
|
bmz->hashes[1] = cmph_hash_state_new(bmz->hashfuncs[1], bmz->n);
|
||||||
DEBUGP("Generating edges\n");
|
DEBUGP("Generating edges\n");
|
||||||
ok = bmz_gen_edges(mph);
|
ok = bmz_gen_edges(mph);
|
||||||
if (!ok)
|
if (!ok)
|
||||||
{
|
{
|
||||||
--iterations;
|
--iterations;
|
||||||
hash_state_destroy(bmz->hashes[0]);
|
cmph_hash_state_destroy(bmz->hashes[0]);
|
||||||
bmz->hashes[0] = NULL;
|
bmz->hashes[0] = NULL;
|
||||||
hash_state_destroy(bmz->hashes[1]);
|
cmph_hash_state_destroy(bmz->hashes[1]);
|
||||||
bmz->hashes[1] = NULL;
|
bmz->hashes[1] = NULL;
|
||||||
DEBUGP("%u iterations remaining\n", iterations);
|
DEBUGP("%u iterations remaining\n", iterations);
|
||||||
if (mph->verbosity)
|
if (mph->verbosity)
|
||||||
@ -127,7 +127,7 @@ mphf_t *bmz_mph_create(mph_t *mph, float bmz_c)
|
|||||||
}
|
}
|
||||||
if (iterations == 0)
|
if (iterations == 0)
|
||||||
{
|
{
|
||||||
graph_destroy(bmz->graph);
|
cmph_graph_destroy(bmz->graph);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -137,7 +137,7 @@ mphf_t *bmz_mph_create(mph_t *mph, float bmz_c)
|
|||||||
fprintf(stderr, "Starting ordering step\n");
|
fprintf(stderr, "Starting ordering step\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
graph_obtain_critical_nodes(bmz->graph);
|
cmph_graph_obtain_critical_nodes(bmz->graph);
|
||||||
|
|
||||||
// Searching step
|
// Searching step
|
||||||
if (mph->verbosity)
|
if (mph->verbosity)
|
||||||
@ -148,14 +148,14 @@ mphf_t *bmz_mph_create(mph_t *mph, float bmz_c)
|
|||||||
DEBUGP("Searching step\n");
|
DEBUGP("Searching step\n");
|
||||||
visited = (char *)malloc(bmz->n/8 + 1);
|
visited = (char *)malloc(bmz->n/8 + 1);
|
||||||
memset(visited, 0, bmz->n/8 + 1);
|
memset(visited, 0, bmz->n/8 + 1);
|
||||||
used_edges = (uint8 *)malloc(bmz->m/8 + 1);
|
used_edges = (cmph_uint8 *)malloc(bmz->m/8 + 1);
|
||||||
memset(used_edges, 0, bmz->m/8 + 1);
|
memset(used_edges, 0, bmz->m/8 + 1);
|
||||||
free(bmz->g);
|
free(bmz->g);
|
||||||
bmz->g = malloc(bmz->n * sizeof(uint32));
|
bmz->g = malloc(bmz->n * sizeof(cmph_uint32));
|
||||||
assert(bmz->g);
|
assert(bmz->g);
|
||||||
for (i = 0; i < bmz->n; ++i) // critical nodes
|
for (i = 0; i < bmz->n; ++i) // critical nodes
|
||||||
{
|
{
|
||||||
if (graph_node_is_critical(bmz->graph, i) && (!GETBIT(visited,i)))
|
if (cmph_graph_node_is_critical(bmz->graph, i) && (!GETBIT(visited,i)))
|
||||||
{
|
{
|
||||||
if(bmz_c > 1.14) restart_mapping = bmz_traverse_critical_nodes(bmz, i, &biggest_g_value, &biggest_edge_value, used_edges, visited);
|
if(bmz_c > 1.14) restart_mapping = bmz_traverse_critical_nodes(bmz, i, &biggest_g_value, &biggest_edge_value, used_edges, visited);
|
||||||
else restart_mapping = bmz_traverse_critical_nodes_heuristic(bmz, i, &biggest_g_value, &biggest_edge_value, used_edges, visited);
|
else restart_mapping = bmz_traverse_critical_nodes_heuristic(bmz, i, &biggest_g_value, &biggest_edge_value, used_edges, visited);
|
||||||
@ -178,12 +178,12 @@ mphf_t *bmz_mph_create(mph_t *mph, float bmz_c)
|
|||||||
free(used_edges);
|
free(used_edges);
|
||||||
free(visited);
|
free(visited);
|
||||||
}while(restart_mapping && iterations_map > 0);
|
}while(restart_mapping && iterations_map > 0);
|
||||||
graph_destroy(bmz->graph);
|
cmph_graph_destroy(bmz->graph);
|
||||||
bmz->graph = NULL;
|
bmz->graph = NULL;
|
||||||
if (iterations_map == 0) return NULL;
|
if (iterations_map == 0) return NULL;
|
||||||
mphf = (mphf_t *)malloc(sizeof(mphf_t));
|
mphf = (cmph_mphf_t *)malloc(sizeof(cmph_mphf_t));
|
||||||
mphf->algo = mph->algo;
|
mphf->algo = mph->algo;
|
||||||
bmzf = (bmz_mphf_data_t *)malloc(sizeof(bmz_mph_data_t));
|
bmzf = (cmph_bmz_mphf_data_t *)malloc(sizeof(cmph_bmz_mph_data_t));
|
||||||
bmzf->g = bmz->g;
|
bmzf->g = bmz->g;
|
||||||
bmz->g = NULL; //transfer memory ownership
|
bmz->g = NULL; //transfer memory ownership
|
||||||
bmzf->hashes = bmz->hashes;
|
bmzf->hashes = bmz->hashes;
|
||||||
@ -200,41 +200,41 @@ mphf_t *bmz_mph_create(mph_t *mph, float bmz_c)
|
|||||||
return mphf;
|
return mphf;
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint8 bmz_traverse_critical_nodes(bmz_mph_data_t *bmz, uint32 v, uint32 * biggest_g_value, uint32 * biggest_edge_value, uint8 * used_edges, uint8 * visited)
|
static cmph_uint8 bmz_traverse_critical_nodes(cmph_bmz_mph_data_t *bmz, cmph_uint32 v, cmph_uint32 * biggest_g_value, cmph_uint32 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited)
|
||||||
{
|
{
|
||||||
uint32 next_g;
|
cmph_uint32 next_g;
|
||||||
uint32 u; /* Auxiliary vertex */
|
cmph_uint32 u; /* Auxiliary vertex */
|
||||||
uint32 lav; /* lookahead vertex */
|
cmph_uint32 lav; /* lookahead vertex */
|
||||||
uint8 collision;
|
cmph_uint8 collision;
|
||||||
vqueue_t * q = vqueue_new((uint32)(0.5*graph_ncritical_nodes(bmz->graph)) + 1);
|
cmph_vqueue_t * q = cmph_vqueue_new((cmph_uint32)(0.5*cmph_graph_ncritical_nodes(bmz->graph)) + 1);
|
||||||
graph_iterator_t it, it1;
|
cmph_graph_iterator_t it, it1;
|
||||||
|
|
||||||
DEBUGP("Labelling critical vertices\n");
|
DEBUGP("Labelling critical vertices\n");
|
||||||
bmz->g[v] = (uint32)ceil ((double)(*biggest_edge_value)/2) - 1;
|
bmz->g[v] = (cmph_uint32)ceil ((double)(*biggest_edge_value)/2) - 1;
|
||||||
SETBIT(visited, v);
|
SETBIT(visited, v);
|
||||||
next_g = (uint32)floor((double)(*biggest_edge_value/2)); /* next_g is incremented in the do..while statement*/
|
next_g = (cmph_uint32)floor((double)(*biggest_edge_value/2)); /* next_g is incremented in the do..while statement*/
|
||||||
vqueue_insert(q, v);
|
cmph_vqueue_insert(q, v);
|
||||||
while(!vqueue_is_empty(q))
|
while(!cmph_vqueue_is_empty(q))
|
||||||
{
|
{
|
||||||
v = vqueue_remove(q);
|
v = cmph_vqueue_remove(q);
|
||||||
it = graph_neighbors_it(bmz->graph, v);
|
it = cmph_graph_neighbors_it(bmz->graph, v);
|
||||||
while ((u = graph_next_neighbor(bmz->graph, &it)) != GRAPH_NO_NEIGHBOR)
|
while ((u = cmph_graph_next_neighbor(bmz->graph, &it)) != CMPH_GRAPH_NO_NEIGHBOR)
|
||||||
{
|
{
|
||||||
if (graph_node_is_critical(bmz->graph, u) && (!GETBIT(visited,u)))
|
if (cmph_graph_node_is_critical(bmz->graph, u) && (!GETBIT(visited,u)))
|
||||||
{
|
{
|
||||||
collision = 1;
|
collision = 1;
|
||||||
while(collision) // lookahead to resolve collisions
|
while(collision) // lookahead to resolve collisions
|
||||||
{
|
{
|
||||||
next_g = *biggest_g_value + 1;
|
next_g = *biggest_g_value + 1;
|
||||||
it1 = graph_neighbors_it(bmz->graph, u);
|
it1 = cmph_graph_neighbors_it(bmz->graph, u);
|
||||||
collision = 0;
|
collision = 0;
|
||||||
while((lav = graph_next_neighbor(bmz->graph, &it1)) != GRAPH_NO_NEIGHBOR)
|
while((lav = cmph_graph_next_neighbor(bmz->graph, &it1)) != CMPH_GRAPH_NO_NEIGHBOR)
|
||||||
{
|
{
|
||||||
if (graph_node_is_critical(bmz->graph, lav) && GETBIT(visited,lav))
|
if (cmph_graph_node_is_critical(bmz->graph, lav) && GETBIT(visited,lav))
|
||||||
{
|
{
|
||||||
if(next_g + bmz->g[lav] >= bmz->m)
|
if(next_g + bmz->g[lav] >= bmz->m)
|
||||||
{
|
{
|
||||||
vqueue_destroy(q);
|
cmph_vqueue_destroy(q);
|
||||||
return 1; // restart mapping step.
|
return 1; // restart mapping step.
|
||||||
}
|
}
|
||||||
if (GETBIT(used_edges, next_g + bmz->g[lav]))
|
if (GETBIT(used_edges, next_g + bmz->g[lav]))
|
||||||
@ -247,10 +247,10 @@ static uint8 bmz_traverse_critical_nodes(bmz_mph_data_t *bmz, uint32 v, uint32 *
|
|||||||
if (next_g > *biggest_g_value) *biggest_g_value = next_g;
|
if (next_g > *biggest_g_value) *biggest_g_value = next_g;
|
||||||
}
|
}
|
||||||
// Marking used edges...
|
// Marking used edges...
|
||||||
it1 = graph_neighbors_it(bmz->graph, u);
|
it1 = cmph_graph_neighbors_it(bmz->graph, u);
|
||||||
while((lav = graph_next_neighbor(bmz->graph, &it1)) != GRAPH_NO_NEIGHBOR)
|
while((lav = cmph_graph_next_neighbor(bmz->graph, &it1)) != CMPH_GRAPH_NO_NEIGHBOR)
|
||||||
{
|
{
|
||||||
if (graph_node_is_critical(bmz->graph, lav) && GETBIT(visited, lav))
|
if (cmph_graph_node_is_critical(bmz->graph, lav) && GETBIT(visited, lav))
|
||||||
{
|
{
|
||||||
SETBIT(used_edges,next_g + bmz->g[lav]);
|
SETBIT(used_edges,next_g + bmz->g[lav]);
|
||||||
if(next_g + bmz->g[lav] > *biggest_edge_value) *biggest_edge_value = next_g + bmz->g[lav];
|
if(next_g + bmz->g[lav] > *biggest_edge_value) *biggest_edge_value = next_g + bmz->g[lav];
|
||||||
@ -258,41 +258,41 @@ static uint8 bmz_traverse_critical_nodes(bmz_mph_data_t *bmz, uint32 v, uint32 *
|
|||||||
}
|
}
|
||||||
bmz->g[u] = next_g; // Labelling vertex u.
|
bmz->g[u] = next_g; // Labelling vertex u.
|
||||||
SETBIT(visited,u);
|
SETBIT(visited,u);
|
||||||
vqueue_insert(q, u);
|
cmph_vqueue_insert(q, u);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
vqueue_destroy(q);
|
cmph_vqueue_destroy(q);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint8 bmz_traverse_critical_nodes_heuristic(bmz_mph_data_t *bmz, uint32 v, uint32 * biggest_g_value, uint32 * biggest_edge_value, uint8 * used_edges, uint8 * visited)
|
static cmph_uint8 bmz_traverse_critical_nodes_heuristic(cmph_bmz_mph_data_t *bmz, cmph_uint32 v, cmph_uint32 * biggest_g_value, cmph_uint32 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited)
|
||||||
{
|
{
|
||||||
uint32 next_g;
|
cmph_uint32 next_g;
|
||||||
uint32 u; /* Auxiliary vertex */
|
cmph_uint32 u; /* Auxiliary vertex */
|
||||||
uint32 lav; /* lookahead vertex */
|
cmph_uint32 lav; /* lookahead vertex */
|
||||||
uint8 collision;
|
cmph_uint8 collision;
|
||||||
uint32 * unused_g_values = NULL;
|
cmph_uint32 * unused_g_values = NULL;
|
||||||
uint32 unused_g_values_capacity = 0;
|
cmph_uint32 unused_g_values_capacity = 0;
|
||||||
uint32 nunused_g_values = 0;
|
cmph_uint32 nunused_g_values = 0;
|
||||||
vqueue_t * q = vqueue_new((uint32)(0.5*graph_ncritical_nodes(bmz->graph))+1);
|
cmph_vqueue_t * q = cmph_vqueue_new((cmph_uint32)(0.5*cmph_graph_ncritical_nodes(bmz->graph))+1);
|
||||||
graph_iterator_t it, it1;
|
cmph_graph_iterator_t it, it1;
|
||||||
|
|
||||||
DEBUGP("Labelling critical vertices\n");
|
DEBUGP("Labelling critical vertices\n");
|
||||||
bmz->g[v] = (uint32)ceil ((double)(*biggest_edge_value)/2) - 1;
|
bmz->g[v] = (cmph_uint32)ceil ((double)(*biggest_edge_value)/2) - 1;
|
||||||
SETBIT(visited, v);
|
SETBIT(visited, v);
|
||||||
next_g = (uint32)floor((double)(*biggest_edge_value/2)); /* next_g is incremented in the do..while statement*/
|
next_g = (cmph_uint32)floor((double)(*biggest_edge_value/2)); /* next_g is incremented in the do..while statement*/
|
||||||
vqueue_insert(q, v);
|
cmph_vqueue_insert(q, v);
|
||||||
while(!vqueue_is_empty(q))
|
while(!cmph_vqueue_is_empty(q))
|
||||||
{
|
{
|
||||||
v = vqueue_remove(q);
|
v = cmph_vqueue_remove(q);
|
||||||
it = graph_neighbors_it(bmz->graph, v);
|
it = cmph_graph_neighbors_it(bmz->graph, v);
|
||||||
while ((u = graph_next_neighbor(bmz->graph, &it)) != GRAPH_NO_NEIGHBOR)
|
while ((u = cmph_graph_next_neighbor(bmz->graph, &it)) != CMPH_GRAPH_NO_NEIGHBOR)
|
||||||
{
|
{
|
||||||
if (graph_node_is_critical(bmz->graph, u) && (!GETBIT(visited,u)))
|
if (cmph_graph_node_is_critical(bmz->graph, u) && (!GETBIT(visited,u)))
|
||||||
{
|
{
|
||||||
uint32 next_g_index = 0;
|
cmph_uint32 next_g_index = 0;
|
||||||
collision = 1;
|
collision = 1;
|
||||||
while(collision) // lookahead to resolve collisions
|
while(collision) // lookahead to resolve collisions
|
||||||
{
|
{
|
||||||
@ -305,15 +305,15 @@ static uint8 bmz_traverse_critical_nodes_heuristic(bmz_mph_data_t *bmz, uint32 v
|
|||||||
next_g = *biggest_g_value + 1;
|
next_g = *biggest_g_value + 1;
|
||||||
next_g_index = UINT_MAX;
|
next_g_index = UINT_MAX;
|
||||||
}
|
}
|
||||||
it1 = graph_neighbors_it(bmz->graph, u);
|
it1 = cmph_graph_neighbors_it(bmz->graph, u);
|
||||||
collision = 0;
|
collision = 0;
|
||||||
while((lav = graph_next_neighbor(bmz->graph, &it1)) != GRAPH_NO_NEIGHBOR)
|
while((lav = cmph_graph_next_neighbor(bmz->graph, &it1)) != CMPH_GRAPH_NO_NEIGHBOR)
|
||||||
{
|
{
|
||||||
if (graph_node_is_critical(bmz->graph, lav) && GETBIT(visited,lav))
|
if (cmph_graph_node_is_critical(bmz->graph, lav) && GETBIT(visited,lav))
|
||||||
{
|
{
|
||||||
if(next_g + bmz->g[lav] >= bmz->m)
|
if(next_g + bmz->g[lav] >= bmz->m)
|
||||||
{
|
{
|
||||||
vqueue_destroy(q);
|
cmph_vqueue_destroy(q);
|
||||||
free(unused_g_values);
|
free(unused_g_values);
|
||||||
return 1; // restart mapping step.
|
return 1; // restart mapping step.
|
||||||
}
|
}
|
||||||
@ -328,7 +328,7 @@ static uint8 bmz_traverse_critical_nodes_heuristic(bmz_mph_data_t *bmz, uint32 v
|
|||||||
{
|
{
|
||||||
if(nunused_g_values == unused_g_values_capacity)
|
if(nunused_g_values == unused_g_values_capacity)
|
||||||
{
|
{
|
||||||
unused_g_values = realloc(unused_g_values, (unused_g_values_capacity + BUFSIZ)*sizeof(uint32));
|
unused_g_values = realloc(unused_g_values, (unused_g_values_capacity + BUFSIZ)*sizeof(cmph_uint32));
|
||||||
unused_g_values_capacity += BUFSIZ;
|
unused_g_values_capacity += BUFSIZ;
|
||||||
}
|
}
|
||||||
unused_g_values[nunused_g_values++] = next_g;
|
unused_g_values[nunused_g_values++] = next_g;
|
||||||
@ -340,10 +340,10 @@ static uint8 bmz_traverse_critical_nodes_heuristic(bmz_mph_data_t *bmz, uint32 v
|
|||||||
if (next_g_index < nunused_g_values) unused_g_values[next_g_index] = unused_g_values[--nunused_g_values];
|
if (next_g_index < nunused_g_values) unused_g_values[next_g_index] = unused_g_values[--nunused_g_values];
|
||||||
|
|
||||||
// Marking used edges...
|
// Marking used edges...
|
||||||
it1 = graph_neighbors_it(bmz->graph, u);
|
it1 = cmph_graph_neighbors_it(bmz->graph, u);
|
||||||
while((lav = graph_next_neighbor(bmz->graph, &it1)) != GRAPH_NO_NEIGHBOR)
|
while((lav = cmph_graph_next_neighbor(bmz->graph, &it1)) != CMPH_GRAPH_NO_NEIGHBOR)
|
||||||
{
|
{
|
||||||
if (graph_node_is_critical(bmz->graph, lav) && GETBIT(visited, lav))
|
if (cmph_graph_node_is_critical(bmz->graph, lav) && GETBIT(visited, lav))
|
||||||
{
|
{
|
||||||
SETBIT(used_edges,next_g + bmz->g[lav]);
|
SETBIT(used_edges,next_g + bmz->g[lav]);
|
||||||
if(next_g + bmz->g[lav] > *biggest_edge_value) *biggest_edge_value = next_g + bmz->g[lav];
|
if(next_g + bmz->g[lav] > *biggest_edge_value) *biggest_edge_value = next_g + bmz->g[lav];
|
||||||
@ -351,17 +351,17 @@ static uint8 bmz_traverse_critical_nodes_heuristic(bmz_mph_data_t *bmz, uint32 v
|
|||||||
}
|
}
|
||||||
bmz->g[u] = next_g; // Labelling vertex u.
|
bmz->g[u] = next_g; // Labelling vertex u.
|
||||||
SETBIT(visited, u);
|
SETBIT(visited, u);
|
||||||
vqueue_insert(q, u);
|
cmph_vqueue_insert(q, u);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
vqueue_destroy(q);
|
cmph_vqueue_destroy(q);
|
||||||
free(unused_g_values);
|
free(unused_g_values);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32 next_unused_edge(bmz_mph_data_t *bmz, uint8 * used_edges, uint32 unused_edge_index)
|
static cmph_uint32 next_unused_edge(cmph_bmz_mph_data_t *bmz, cmph_uint8 * used_edges, cmph_uint32 unused_edge_index)
|
||||||
{
|
{
|
||||||
while(1)
|
while(1)
|
||||||
{
|
{
|
||||||
@ -372,11 +372,11 @@ static uint32 next_unused_edge(bmz_mph_data_t *bmz, uint8 * used_edges, uint32 u
|
|||||||
return unused_edge_index;
|
return unused_edge_index;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bmz_traverse(bmz_mph_data_t *bmz, uint8 * used_edges, uint32 v, uint32 * unused_edge_index, uint8 * visited)
|
static void bmz_traverse(cmph_bmz_mph_data_t *bmz, cmph_uint8 * used_edges, cmph_uint32 v, cmph_uint32 * unused_edge_index, cmph_uint8 * visited)
|
||||||
{
|
{
|
||||||
graph_iterator_t it = graph_neighbors_it(bmz->graph, v);
|
cmph_graph_iterator_t it = cmph_graph_neighbors_it(bmz->graph, v);
|
||||||
uint32 neighbor = 0;
|
cmph_uint32 neighbor = 0;
|
||||||
while((neighbor = graph_next_neighbor(bmz->graph, &it)) != GRAPH_NO_NEIGHBOR)
|
while((neighbor = cmph_graph_next_neighbor(bmz->graph, &it)) != CMPH_GRAPH_NO_NEIGHBOR)
|
||||||
{
|
{
|
||||||
if(GETBIT(visited,neighbor)) continue;
|
if(GETBIT(visited,neighbor)) continue;
|
||||||
DEBUGP("Visiting neighbor %u\n", neighbor);
|
DEBUGP("Visiting neighbor %u\n", neighbor);
|
||||||
@ -389,15 +389,15 @@ static void bmz_traverse(bmz_mph_data_t *bmz, uint8 * used_edges, uint32 v, uint
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bmz_traverse_non_critical_nodes(bmz_mph_data_t *bmz, uint8 * used_edges, uint8 * visited)
|
static void bmz_traverse_non_critical_nodes(cmph_bmz_mph_data_t *bmz, cmph_uint8 * used_edges, cmph_uint8 * visited)
|
||||||
{
|
{
|
||||||
|
|
||||||
uint32 i, v1, v2, unused_edge_index = 0;
|
cmph_uint32 i, v1, v2, unused_edge_index = 0;
|
||||||
DEBUGP("Labelling non critical vertices\n");
|
DEBUGP("Labelling non critical vertices\n");
|
||||||
for(i = 0; i < bmz->m; i++)
|
for(i = 0; i < bmz->m; i++)
|
||||||
{
|
{
|
||||||
v1 = graph_vertex_id(bmz->graph, i, 0);
|
v1 = cmph_graph_vertex_id(bmz->graph, i, 0);
|
||||||
v2 = graph_vertex_id(bmz->graph, i, 1);
|
v2 = cmph_graph_vertex_id(bmz->graph, i, 1);
|
||||||
if((GETBIT(visited,v1) && GETBIT(visited,v2)) || (!GETBIT(visited,v1) && !GETBIT(visited,v2))) continue;
|
if((GETBIT(visited,v1) && GETBIT(visited,v2)) || (!GETBIT(visited,v1) && !GETBIT(visited,v2))) continue;
|
||||||
if(GETBIT(visited,v1)) bmz_traverse(bmz, used_edges, v1, &unused_edge_index, visited);
|
if(GETBIT(visited,v1)) bmz_traverse(bmz, used_edges, v1, &unused_edge_index, visited);
|
||||||
else bmz_traverse(bmz, used_edges, v2, &unused_edge_index, visited);
|
else bmz_traverse(bmz, used_edges, v2, &unused_edge_index, visited);
|
||||||
@ -416,23 +416,23 @@ static void bmz_traverse_non_critical_nodes(bmz_mph_data_t *bmz, uint8 * used_ed
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bmz_gen_edges(mph_t *mph)
|
static int bmz_gen_edges(cmph_mph_t *mph)
|
||||||
{
|
{
|
||||||
uint32 e;
|
cmph_uint32 e;
|
||||||
bmz_mph_data_t *bmz = (bmz_mph_data_t *)mph->data;
|
cmph_bmz_mph_data_t *bmz = (cmph_bmz_mph_data_t *)mph->data;
|
||||||
uint8 multiple_edges = 0;
|
cmph_uint8 multiple_edges = 0;
|
||||||
|
|
||||||
DEBUGP("Generating edges for %u vertices\n", bmz->n);
|
DEBUGP("Generating edges for %u vertices\n", bmz->n);
|
||||||
graph_clear_edges(bmz->graph);
|
cmph_graph_clear_edges(bmz->graph);
|
||||||
mph->key_source->rewind(mph->key_source->data);
|
mph->key_source->rewind(mph->key_source->data);
|
||||||
for (e = 0; e < mph->key_source->nkeys; ++e)
|
for (e = 0; e < mph->key_source->nkeys; ++e)
|
||||||
{
|
{
|
||||||
uint32 h1, h2;
|
cmph_uint32 h1, h2;
|
||||||
uint32 keylen;
|
cmph_uint32 keylen;
|
||||||
char *key;
|
char *key;
|
||||||
mph->key_source->read(mph->key_source->data, &key, &keylen);
|
mph->key_source->read(mph->key_source->data, &key, &keylen);
|
||||||
h1 = hash(bmz->hashes[0], key, keylen) % bmz->n;
|
h1 = cmph_hash(bmz->hashes[0], key, keylen) % bmz->n;
|
||||||
h2 = hash(bmz->hashes[1], key, keylen) % bmz->n;
|
h2 = cmph_hash(bmz->hashes[1], key, keylen) % bmz->n;
|
||||||
if (h1 == h2) if (++h2 >= bmz->n) h2 = 0;
|
if (h1 == h2) if (++h2 >= bmz->n) h2 = 0;
|
||||||
if (h1 == h2)
|
if (h1 == h2)
|
||||||
{
|
{
|
||||||
@ -442,43 +442,43 @@ static int bmz_gen_edges(mph_t *mph)
|
|||||||
}
|
}
|
||||||
DEBUGP("Adding edge: %u -> %u for key %s\n", h1, h2, key);
|
DEBUGP("Adding edge: %u -> %u for key %s\n", h1, h2, key);
|
||||||
mph->key_source->dispose(mph->key_source->data, key, keylen);
|
mph->key_source->dispose(mph->key_source->data, key, keylen);
|
||||||
multiple_edges = graph_contains_edge(bmz->graph, h1, h2);
|
multiple_edges = cmph_graph_contains_edge(bmz->graph, h1, h2);
|
||||||
if (mph->verbosity && multiple_edges) fprintf(stderr, "A non simple graph was generated\n");
|
if (mph->verbosity && multiple_edges) fprintf(stderr, "A non simple graph was generated\n");
|
||||||
if (multiple_edges) return 0; // checking multiple edge restriction.
|
if (multiple_edges) return 0; // checking multiple edge restriction.
|
||||||
graph_add_edge(bmz->graph, h1, h2);
|
cmph_graph_add_edge(bmz->graph, h1, h2);
|
||||||
}
|
}
|
||||||
return !multiple_edges;
|
return !multiple_edges;
|
||||||
}
|
}
|
||||||
|
|
||||||
int bmz_mphf_dump(mphf_t *mphf, FILE *fd)
|
int cmph_bmz_mphf_dump(cmph_mphf_t *mphf, FILE *fd)
|
||||||
{
|
{
|
||||||
char *buf = NULL;
|
char *buf = NULL;
|
||||||
uint32 buflen;
|
cmph_uint32 buflen;
|
||||||
uint32 nbuflen;
|
cmph_uint32 nbuflen;
|
||||||
uint32 i;
|
cmph_uint32 i;
|
||||||
uint32 two = 2; //number of hash functions
|
cmph_uint32 two = 2; //number of hash functions
|
||||||
bmz_mphf_data_t *data = (bmz_mphf_data_t *)mphf->data;
|
cmph_bmz_mphf_data_t *data = (cmph_bmz_mphf_data_t *)mphf->data;
|
||||||
uint32 nn, nm;
|
cmph_uint32 nn, nm;
|
||||||
__mphf_dump(mphf, fd);
|
cmph__mphf_dump(mphf, fd);
|
||||||
|
|
||||||
fwrite(&two, sizeof(uint32), 1, fd);
|
fwrite(&two, sizeof(cmph_uint32), 1, fd);
|
||||||
|
|
||||||
hash_state_dump(data->hashes[0], &buf, &buflen);
|
cmph_hash_state_dump(data->hashes[0], &buf, &buflen);
|
||||||
DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
|
DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
|
||||||
fwrite(&buflen, sizeof(uint32), 1, fd);
|
fwrite(&buflen, sizeof(cmph_uint32), 1, fd);
|
||||||
fwrite(buf, buflen, 1, fd);
|
fwrite(buf, buflen, 1, fd);
|
||||||
free(buf);
|
free(buf);
|
||||||
|
|
||||||
hash_state_dump(data->hashes[1], &buf, &buflen);
|
cmph_hash_state_dump(data->hashes[1], &buf, &buflen);
|
||||||
DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
|
DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
|
||||||
fwrite(&buflen, sizeof(uint32), 1, fd);
|
fwrite(&buflen, sizeof(cmph_uint32), 1, fd);
|
||||||
fwrite(buf, buflen, 1, fd);
|
fwrite(buf, buflen, 1, fd);
|
||||||
free(buf);
|
free(buf);
|
||||||
|
|
||||||
fwrite(&(data->n), sizeof(uint32), 1, fd);
|
fwrite(&(data->n), sizeof(cmph_uint32), 1, fd);
|
||||||
fwrite(&(data->m), sizeof(uint32), 1, fd);
|
fwrite(&(data->m), sizeof(cmph_uint32), 1, fd);
|
||||||
|
|
||||||
fwrite(data->g, sizeof(uint32)*(data->n), 1, fd);
|
fwrite(data->g, sizeof(cmph_uint32)*(data->n), 1, fd);
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
fprintf(stderr, "G: ");
|
fprintf(stderr, "G: ");
|
||||||
for (i = 0; i < data->n; ++i) fprintf(stderr, "%u ", data->g[i]);
|
for (i = 0; i < data->n; ++i) fprintf(stderr, "%u ", data->g[i]);
|
||||||
@ -487,38 +487,38 @@ int bmz_mphf_dump(mphf_t *mphf, FILE *fd)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void bmz_mphf_load(FILE *f, mphf_t *mphf)
|
void cmph_bmz_mphf_load(FILE *f, cmph_mphf_t *mphf)
|
||||||
{
|
{
|
||||||
uint32 nhashes;
|
cmph_uint32 nhashes;
|
||||||
char *buf = NULL;
|
char *buf = NULL;
|
||||||
uint32 buflen;
|
cmph_uint32 buflen;
|
||||||
uint32 i;
|
cmph_uint32 i;
|
||||||
bmz_mphf_data_t *bmz = (bmz_mphf_data_t *)malloc(sizeof(bmz_mphf_data_t));
|
cmph_bmz_mphf_data_t *bmz = (cmph_bmz_mphf_data_t *)malloc(sizeof(cmph_bmz_mphf_data_t));
|
||||||
|
|
||||||
DEBUGP("Loading bmz mphf\n");
|
DEBUGP("Loading bmz mphf\n");
|
||||||
mphf->data = bmz;
|
mphf->data = bmz;
|
||||||
fread(&nhashes, sizeof(uint32), 1, f);
|
fread(&nhashes, sizeof(cmph_uint32), 1, f);
|
||||||
bmz->hashes = (hash_state_t **)malloc(sizeof(hash_state_t *)*(nhashes + 1));
|
bmz->hashes = (cmph_hash_state_t **)malloc(sizeof(cmph_hash_state_t *)*(nhashes + 1));
|
||||||
bmz->hashes[nhashes] = NULL;
|
bmz->hashes[nhashes] = NULL;
|
||||||
DEBUGP("Reading %u hashes\n", nhashes);
|
DEBUGP("Reading %u hashes\n", nhashes);
|
||||||
for (i = 0; i < nhashes; ++i)
|
for (i = 0; i < nhashes; ++i)
|
||||||
{
|
{
|
||||||
hash_state_t *state = NULL;
|
cmph_hash_state_t *state = NULL;
|
||||||
fread(&buflen, sizeof(uint32), 1, f);
|
fread(&buflen, sizeof(cmph_uint32), 1, f);
|
||||||
DEBUGP("Hash state has %u bytes\n", buflen);
|
DEBUGP("Hash state has %u bytes\n", buflen);
|
||||||
buf = (char *)malloc(buflen);
|
buf = (char *)malloc(buflen);
|
||||||
fread(buf, buflen, 1, f);
|
fread(buf, buflen, 1, f);
|
||||||
state = hash_state_load(buf, buflen);
|
state = cmph_hash_state_load(buf, buflen);
|
||||||
bmz->hashes[i] = state;
|
bmz->hashes[i] = state;
|
||||||
free(buf);
|
free(buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUGP("Reading m and n\n");
|
DEBUGP("Reading m and n\n");
|
||||||
fread(&(bmz->n), sizeof(uint32), 1, f);
|
fread(&(bmz->n), sizeof(cmph_uint32), 1, f);
|
||||||
fread(&(bmz->m), sizeof(uint32), 1, f);
|
fread(&(bmz->m), sizeof(cmph_uint32), 1, f);
|
||||||
|
|
||||||
bmz->g = (uint32 *)malloc(sizeof(uint32)*bmz->n);
|
bmz->g = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*bmz->n);
|
||||||
fread(bmz->g, bmz->n*sizeof(uint32), 1, f);
|
fread(bmz->g, bmz->n*sizeof(cmph_uint32), 1, f);
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
fprintf(stderr, "G: ");
|
fprintf(stderr, "G: ");
|
||||||
for (i = 0; i < bmz->n; ++i) fprintf(stderr, "%u ", bmz->g[i]);
|
for (i = 0; i < bmz->n; ++i) fprintf(stderr, "%u ", bmz->g[i]);
|
||||||
@ -528,22 +528,22 @@ void bmz_mphf_load(FILE *f, mphf_t *mphf)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
uint32 bmz_mphf_search(mphf_t *mphf, const char *key, uint32 keylen)
|
cmph_uint32 cmph_bmz_mphf_search(cmph_mphf_t *mphf, const char *key, cmph_uint32 keylen)
|
||||||
{
|
{
|
||||||
bmz_mphf_data_t *bmz = mphf->data;
|
cmph_bmz_mphf_data_t *bmz = mphf->data;
|
||||||
uint32 h1 = hash(bmz->hashes[0], key, keylen) % bmz->n;
|
cmph_uint32 h1 = cmph_hash(bmz->hashes[0], key, keylen) % bmz->n;
|
||||||
uint32 h2 = hash(bmz->hashes[1], key, keylen) % bmz->n;
|
cmph_uint32 h2 = cmph_hash(bmz->hashes[1], key, keylen) % bmz->n;
|
||||||
DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
|
DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
|
||||||
if (h1 == h2 && ++h2 > bmz->n) h2 = 0;
|
if (h1 == h2 && ++h2 > bmz->n) h2 = 0;
|
||||||
DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, bmz->g[h1], bmz->g[h2], bmz->m);
|
DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, bmz->g[h1], bmz->g[h2], bmz->m);
|
||||||
return bmz->g[h1] + bmz->g[h2];
|
return bmz->g[h1] + bmz->g[h2];
|
||||||
}
|
}
|
||||||
void bmz_mphf_destroy(mphf_t *mphf)
|
void cmph_bmz_mphf_destroy(cmph_mphf_t *mphf)
|
||||||
{
|
{
|
||||||
bmz_mphf_data_t *data = (bmz_mphf_data_t *)mphf->data;
|
cmph_bmz_mphf_data_t *data = (cmph_bmz_mphf_data_t *)mphf->data;
|
||||||
free(data->g);
|
free(data->g);
|
||||||
hash_state_destroy(data->hashes[0]);
|
cmph_hash_state_destroy(data->hashes[0]);
|
||||||
hash_state_destroy(data->hashes[1]);
|
cmph_hash_state_destroy(data->hashes[1]);
|
||||||
free(data->hashes);
|
free(data->hashes);
|
||||||
free(data);
|
free(data);
|
||||||
free(mphf);
|
free(mphf);
|
||||||
|
20
src/bmz.h
20
src/bmz.h
@ -4,16 +4,16 @@
|
|||||||
#include "graph.h"
|
#include "graph.h"
|
||||||
#include "cmph.h"
|
#include "cmph.h"
|
||||||
|
|
||||||
typedef struct __bmz_mphf_data_t bmz_mphf_data_t;
|
typedef struct cmph__bmz_mphf_data_t cmph_bmz_mphf_data_t;
|
||||||
typedef struct __bmz_mph_data_t bmz_mph_data_t;
|
typedef struct cmph__bmz_mph_data_t cmph_bmz_mph_data_t;
|
||||||
|
|
||||||
mph_t *bmz_mph_new(key_source_t *key_source);
|
cmph_mph_t *cmph_bmz_mph_new(cmph_key_source_t *key_source);
|
||||||
void bmz_mph_set_hashfuncs(mph_t *mph, CMPH_HASH *hashfuncs);
|
void cmph_bmz_mph_set_hashfuncs(cmph_mph_t *mph, CMPH_HASH *hashfuncs);
|
||||||
void bmz_mph_destroy(mph_t *mph);
|
void cmph_bmz_mph_destroy(cmph_mph_t *mph);
|
||||||
mphf_t *bmz_mph_create(mph_t *mph, float bmz_c);
|
cmph_mphf_t *cmph_bmz_mph_create(cmph_mph_t *mph, float bmz_c);
|
||||||
|
|
||||||
void bmz_mphf_load(FILE *f, mphf_t *mphf);
|
void cmph_bmz_mphf_load(FILE *f, cmph_mphf_t *mphf);
|
||||||
int bmz_mphf_dump(mphf_t *mphf, FILE *f);
|
int cmph_bmz_mphf_dump(cmph_mphf_t *mphf, FILE *f);
|
||||||
void bmz_mphf_destroy(mphf_t *mphf);
|
void cmph_bmz_mphf_destroy(cmph_mphf_t *mphf);
|
||||||
uint32 bmz_mphf_search(mphf_t *mphf, const char *key, uint32 keylen);
|
cmph_uint32 cmph_bmz_mphf_search(cmph_mphf_t *mphf, const char *key, cmph_uint32 keylen);
|
||||||
#endif
|
#endif
|
||||||
|
@ -3,22 +3,22 @@
|
|||||||
|
|
||||||
#include "hash_state.h"
|
#include "hash_state.h"
|
||||||
|
|
||||||
struct __bmz_mphf_data_t
|
struct cmph__bmz_mphf_data_t
|
||||||
{
|
{
|
||||||
uint32 m; //edges (words) count
|
cmph_uint32 m; //edges (words) count
|
||||||
uint32 n; //vertex count
|
cmph_uint32 n; //vertex count
|
||||||
uint32 *g;
|
cmph_uint32 *g;
|
||||||
hash_state_t **hashes;
|
cmph_hash_state_t **hashes;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct __bmz_mph_data_t
|
struct cmph__bmz_mph_data_t
|
||||||
{
|
{
|
||||||
CMPH_HASH hashfuncs[2];
|
CMPH_HASH hashfuncs[2];
|
||||||
uint32 m; //edges (words) count
|
cmph_uint32 m; //edges (words) count
|
||||||
uint32 n; //vertex count
|
cmph_uint32 n; //vertex count
|
||||||
graph_t *graph;
|
cmph_graph_t *graph;
|
||||||
uint32 *g;
|
cmph_uint32 *g;
|
||||||
hash_state_t **hashes;
|
cmph_hash_state_t **hashes;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
100
src/cmph.c
100
src/cmph.c
@ -10,20 +10,20 @@
|
|||||||
//#define DEBUG
|
//#define DEBUG
|
||||||
#include "debug.h"
|
#include "debug.h"
|
||||||
|
|
||||||
const char *mph_names[] = { "czech", "bmz", NULL }; /* included -- Fabiano */
|
const char *cmph_names[] = { "czech", "bmz", NULL }; /* included -- Fabiano */
|
||||||
|
|
||||||
mph_t *mph_new(MPH_ALGO algo, key_source_t *key_source)
|
cmph_mph_t *cmph_mph_new(CMPH_ALGO algo, cmph_key_source_t *key_source)
|
||||||
{
|
{
|
||||||
mph_t *mph = NULL;
|
cmph_mph_t *mph = NULL;
|
||||||
DEBUGP("Creating mph with algorithm %s\n", mph_names[algo]);
|
DEBUGP("Creating mph with algorithm %s\n", cmph_names[algo]);
|
||||||
switch (algo)
|
switch (algo)
|
||||||
{
|
{
|
||||||
case MPH_CZECH:
|
case CMPH_CZECH:
|
||||||
mph = czech_mph_new(key_source);
|
mph = cmph_czech_mph_new(key_source);
|
||||||
break;
|
break;
|
||||||
case MPH_BMZ: /* included -- Fabiano */
|
case CMPH_BMZ: /* included -- Fabiano */
|
||||||
DEBUGP("new bmz algorithm \n");
|
DEBUGP("new bmz algorithm \n");
|
||||||
mph = bmz_mph_new(key_source);
|
mph = cmph_bmz_mph_new(key_source);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
assert(0);
|
assert(0);
|
||||||
@ -32,63 +32,63 @@ mph_t *mph_new(MPH_ALGO algo, key_source_t *key_source)
|
|||||||
return mph;
|
return mph;
|
||||||
}
|
}
|
||||||
|
|
||||||
void mph_destroy(mph_t *mph)
|
void cmph_mph_destroy(cmph_mph_t *mph)
|
||||||
{
|
{
|
||||||
DEBUGP("Destroying mph with algo %s\n", mph_names[mph->algo]);
|
DEBUGP("Destroying mph with algo %s\n", cmph_names[mph->algo]);
|
||||||
switch (mph->algo)
|
switch (mph->algo)
|
||||||
{
|
{
|
||||||
case MPH_CZECH:
|
case CMPH_CZECH:
|
||||||
czech_mph_destroy(mph);
|
cmph_czech_mph_destroy(mph);
|
||||||
break;
|
break;
|
||||||
case MPH_BMZ: /* included -- Fabiano */
|
case CMPH_BMZ: /* included -- Fabiano */
|
||||||
bmz_mph_destroy(mph);
|
cmph_bmz_mph_destroy(mph);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void mph_set_verbosity(mph_t *mph, uint32 verbosity)
|
void cmph_mph_set_verbosity(cmph_mph_t *mph, cmph_uint32 verbosity)
|
||||||
{
|
{
|
||||||
mph->verbosity = verbosity;
|
mph->verbosity = verbosity;
|
||||||
}
|
}
|
||||||
|
|
||||||
void mph_set_hashfuncs(mph_t *mph, CMPH_HASH *hashfuncs)
|
void cmph_mph_set_hashfuncs(cmph_mph_t *mph, CMPH_HASH *hashfuncs)
|
||||||
{
|
{
|
||||||
switch (mph->algo)
|
switch (mph->algo)
|
||||||
{
|
{
|
||||||
case MPH_CZECH:
|
case CMPH_CZECH:
|
||||||
czech_mph_set_hashfuncs(mph, hashfuncs);
|
cmph_czech_mph_set_hashfuncs(mph, hashfuncs);
|
||||||
break;
|
break;
|
||||||
case MPH_BMZ: /* included -- Fabiano */
|
case CMPH_BMZ: /* included -- Fabiano */
|
||||||
bmz_mph_set_hashfuncs(mph, hashfuncs);
|
cmph_bmz_mph_set_hashfuncs(mph, hashfuncs);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
void mph_set_graphsize(mph_t *mph, float c)
|
void cmph_mph_set_graphsize(cmph_mph_t *mph, float c)
|
||||||
{
|
{
|
||||||
mph->c = c;
|
mph->c = c;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
mphf_t *mph_create(mph_t *mph)
|
cmph_mphf_t *cmph_mph_create(cmph_mph_t *mph)
|
||||||
{
|
{
|
||||||
mphf_t *mphf = NULL;
|
cmph_mphf_t *mphf = NULL;
|
||||||
float c = mph->c;
|
float c = mph->c;
|
||||||
switch (mph->algo)
|
switch (mph->algo)
|
||||||
{
|
{
|
||||||
case MPH_CZECH:
|
case CMPH_CZECH:
|
||||||
DEBUGP("Creating czech hash\n");
|
DEBUGP("Creating czech hash\n");
|
||||||
if (c == 0) c = 2.09;
|
if (c == 0) c = 2.09;
|
||||||
mphf = czech_mph_create(mph, c);
|
mphf = cmph_czech_mph_create(mph, c);
|
||||||
break;
|
break;
|
||||||
case MPH_BMZ: /* included -- Fabiano */
|
case CMPH_BMZ: /* included -- Fabiano */
|
||||||
DEBUGP("Creating bmz hash\n");
|
DEBUGP("Creating bmz hash\n");
|
||||||
if (c == 0) c = 1.15;
|
if (c == 0) c = 1.15;
|
||||||
mphf = bmz_mph_create(mph, c);
|
mphf = cmph_bmz_mph_create(mph, c);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
assert(0);
|
assert(0);
|
||||||
@ -96,15 +96,15 @@ mphf_t *mph_create(mph_t *mph)
|
|||||||
return mphf;
|
return mphf;
|
||||||
}
|
}
|
||||||
|
|
||||||
int mphf_dump(mphf_t *mphf, FILE *f)
|
int cmph_mphf_dump(cmph_mphf_t *mphf, FILE *f)
|
||||||
{
|
{
|
||||||
switch (mphf->algo)
|
switch (mphf->algo)
|
||||||
{
|
{
|
||||||
case MPH_CZECH:
|
case CMPH_CZECH:
|
||||||
return czech_mphf_dump(mphf, f);
|
return cmph_czech_mphf_dump(mphf, f);
|
||||||
break;
|
break;
|
||||||
case MPH_BMZ: /* included -- Fabiano */
|
case CMPH_BMZ: /* included -- Fabiano */
|
||||||
return bmz_mphf_dump(mphf, f);
|
return cmph_bmz_mphf_dump(mphf, f);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
assert(0);
|
assert(0);
|
||||||
@ -112,22 +112,22 @@ int mphf_dump(mphf_t *mphf, FILE *f)
|
|||||||
assert(0);
|
assert(0);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
mphf_t *mphf_load(FILE *f)
|
cmph_mphf_t *cmph_mphf_load(FILE *f)
|
||||||
{
|
{
|
||||||
mphf_t *mphf = NULL;
|
cmph_mphf_t *mphf = NULL;
|
||||||
DEBUGP("Loading mphf generic parts\n");
|
DEBUGP("Loading mphf generic parts\n");
|
||||||
mphf = __mphf_load(f);
|
mphf = cmph__mphf_load(f);
|
||||||
if (mphf == NULL) return NULL;
|
if (mphf == NULL) return NULL;
|
||||||
DEBUGP("Loading mphf algorithm dependent parts\n");
|
DEBUGP("Loading mphf algorithm dependent parts\n");
|
||||||
|
|
||||||
switch (mphf->algo)
|
switch (mphf->algo)
|
||||||
{
|
{
|
||||||
case MPH_CZECH:
|
case CMPH_CZECH:
|
||||||
czech_mphf_load(f, mphf);
|
cmph_czech_mphf_load(f, mphf);
|
||||||
break;
|
break;
|
||||||
case MPH_BMZ: /* included -- Fabiano */
|
case CMPH_BMZ: /* included -- Fabiano */
|
||||||
DEBUGP("Loading bmz algorithm dependent parts\n");
|
DEBUGP("Loading bmz algorithm dependent parts\n");
|
||||||
bmz_mphf_load(f, mphf);
|
cmph_bmz_mphf_load(f, mphf);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
assert(0);
|
assert(0);
|
||||||
@ -137,16 +137,16 @@ mphf_t *mphf_load(FILE *f)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
uint32 mphf_search(mphf_t *mphf, const char *key, uint32 keylen)
|
cmph_uint32 cmph_mphf_search(cmph_mphf_t *mphf, const char *key, cmph_uint32 keylen)
|
||||||
{
|
{
|
||||||
DEBUGP("mphf algorithm: %u \n", mphf->algo);
|
DEBUGP("mphf algorithm: %u \n", mphf->algo);
|
||||||
switch(mphf->algo)
|
switch(mphf->algo)
|
||||||
{
|
{
|
||||||
case MPH_CZECH:
|
case CMPH_CZECH:
|
||||||
return czech_mphf_search(mphf, key, keylen);
|
return cmph_czech_mphf_search(mphf, key, keylen);
|
||||||
case MPH_BMZ: /* included -- Fabiano */
|
case CMPH_BMZ: /* included -- Fabiano */
|
||||||
DEBUGP("bmz algorithm search\n");
|
DEBUGP("bmz algorithm search\n");
|
||||||
return bmz_mphf_search(mphf, key, keylen);
|
return cmph_bmz_mphf_search(mphf, key, keylen);
|
||||||
default:
|
default:
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
@ -154,20 +154,20 @@ uint32 mphf_search(mphf_t *mphf, const char *key, uint32 keylen)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32 mphf_size(mphf_t *mphf)
|
cmph_uint32 cmph_mphf_size(cmph_mphf_t *mphf)
|
||||||
{
|
{
|
||||||
return mphf->size;
|
return mphf->size;
|
||||||
}
|
}
|
||||||
|
|
||||||
void mphf_destroy(mphf_t *mphf)
|
void cmph_mphf_destroy(cmph_mphf_t *mphf)
|
||||||
{
|
{
|
||||||
switch(mphf->algo)
|
switch(mphf->algo)
|
||||||
{
|
{
|
||||||
case MPH_CZECH:
|
case CMPH_CZECH:
|
||||||
czech_mphf_destroy(mphf);
|
cmph_czech_mphf_destroy(mphf);
|
||||||
return;
|
return;
|
||||||
case MPH_BMZ: /* included -- Fabiano */
|
case CMPH_BMZ: /* included -- Fabiano */
|
||||||
bmz_mphf_destroy(mphf);
|
cmph_bmz_mphf_destroy(mphf);
|
||||||
return;
|
return;
|
||||||
default:
|
default:
|
||||||
assert(0);
|
assert(0);
|
||||||
|
34
src/cmph.h
34
src/cmph.h
@ -11,32 +11,32 @@ extern "C"
|
|||||||
|
|
||||||
#include "cmph_types.h"
|
#include "cmph_types.h"
|
||||||
|
|
||||||
typedef struct __mph_t mph_t;
|
typedef struct cmph__mph_t cmph_mph_t;
|
||||||
typedef struct __mphf_t mphf_t;
|
typedef struct cmph__mphf_t cmph_mphf_t;
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
void *data;
|
void *data;
|
||||||
uint32 nkeys;
|
cmph_uint32 nkeys;
|
||||||
int (*read)(void *, char **, uint32 *);
|
int (*read)(void *, char **, cmph_uint32 *);
|
||||||
void (*dispose)(void *, char *, uint32);
|
void (*dispose)(void *, char *, cmph_uint32);
|
||||||
void (*rewind)(void *);
|
void (*rewind)(void *);
|
||||||
} key_source_t;
|
} cmph_key_source_t;
|
||||||
|
|
||||||
/** Hash generation API **/
|
/** Hash generation API **/
|
||||||
mph_t *mph_new(MPH_ALGO algo, key_source_t *key_source);
|
cmph_mph_t *cmph_mph_new(CMPH_ALGO algo, cmph_key_source_t *key_source);
|
||||||
void mph_set_hashfuncs(mph_t *mph, CMPH_HASH *hashfuncs);
|
void cmph_mph_set_hashfuncs(cmph_mph_t *mph, CMPH_HASH *hashfuncs);
|
||||||
void mph_set_verbosity(mph_t *mph, uint32 verbosity);
|
void cmph_mph_set_verbosity(cmph_mph_t *mph, cmph_uint32 verbosity);
|
||||||
void mph_set_graphsize(mph_t *mph, float c);
|
void cmph_mph_set_graphsize(cmph_mph_t *mph, float c);
|
||||||
void mph_destroy(mph_t *mph);
|
void cmph_mph_destroy(cmph_mph_t *mph);
|
||||||
mphf_t *mph_create(mph_t *mph);
|
cmph_mphf_t *cmph_mph_create(cmph_mph_t *mph);
|
||||||
|
|
||||||
/** Hash querying API **/
|
/** Hash querying API **/
|
||||||
mphf_t *mphf_load(FILE *f);
|
cmph_mphf_t *cmph_mphf_load(FILE *f);
|
||||||
int mphf_dump(mphf_t *mphf, FILE *f);
|
int cmph_mphf_dump(cmph_mphf_t *mphf, FILE *f);
|
||||||
uint32 mphf_search(mphf_t *mphf, const char *key, uint32 keylen);
|
cmph_uint32 cmph_mphf_search(cmph_mphf_t *mphf, const char *key, cmph_uint32 keylen);
|
||||||
uint32 mphf_size(mphf_t *mphf);
|
cmph_uint32 cmph_mphf_size(cmph_mphf_t *mphf);
|
||||||
void mphf_destroy(mphf_t *mphf);
|
void cmph_mphf_destroy(cmph_mphf_t *mphf);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
@ -5,10 +5,10 @@
|
|||||||
//#define DEBUG
|
//#define DEBUG
|
||||||
#include "debug.h"
|
#include "debug.h"
|
||||||
|
|
||||||
mph_t *__mph_new(MPH_ALGO algo, key_source_t *key_source)
|
cmph_mph_t *cmph__mph_new(CMPH_ALGO algo, cmph_key_source_t *key_source)
|
||||||
{
|
{
|
||||||
mph_t *mph = (mph_t *)malloc(sizeof(mph_t));
|
cmph_mph_t *mph = (cmph_mph_t *)malloc(sizeof(cmph_mph_t));
|
||||||
DEBUGP("Creating mph with algorithm %s\n", mph_names[algo]);
|
DEBUGP("Creating mph with algorithm %s\n", cmph_names[algo]);
|
||||||
if (mph == NULL) return NULL;
|
if (mph == NULL) return NULL;
|
||||||
mph->algo = algo;
|
mph->algo = algo;
|
||||||
mph->key_source = key_source;
|
mph->key_source = key_source;
|
||||||
@ -17,51 +17,51 @@ mph_t *__mph_new(MPH_ALGO algo, key_source_t *key_source)
|
|||||||
return mph;
|
return mph;
|
||||||
}
|
}
|
||||||
|
|
||||||
void __mph_destroy(mph_t *mph)
|
void cmph__mph_destroy(cmph_mph_t *mph)
|
||||||
{
|
{
|
||||||
free(mph);
|
free(mph);
|
||||||
}
|
}
|
||||||
|
|
||||||
void __mphf_dump(mphf_t *mphf, FILE *fd)
|
void cmph__mphf_dump(cmph_mphf_t *mphf, FILE *fd)
|
||||||
{
|
{
|
||||||
uint32 nsize = htonl(mphf->size);
|
cmph_uint32 nsize = htonl(mphf->size);
|
||||||
fwrite(mph_names[mphf->algo], (uint32)(strlen(mph_names[mphf->algo]) + 1), 1, fd);
|
fwrite(cmph_names[mphf->algo], (cmph_uint32)(strlen(cmph_names[mphf->algo]) + 1), 1, fd);
|
||||||
fwrite(&nsize, sizeof(mphf->size), 1, fd);
|
fwrite(&nsize, sizeof(mphf->size), 1, fd);
|
||||||
}
|
}
|
||||||
mphf_t *__mphf_load(FILE *f)
|
cmph_mphf_t *cmph__mphf_load(FILE *f)
|
||||||
{
|
{
|
||||||
mphf_t *mphf = NULL;
|
cmph_mphf_t *mphf = NULL;
|
||||||
uint32 i;
|
cmph_uint32 i;
|
||||||
char algo_name[BUFSIZ];
|
char algo_name[BUFSIZ];
|
||||||
char *ptr = algo_name;
|
char *ptr = algo_name;
|
||||||
MPH_ALGO algo = MPH_COUNT;
|
CMPH_ALGO algo = CMPH_COUNT;
|
||||||
|
|
||||||
DEBUGP("Loading mphf\n");
|
DEBUGP("Loading mphf\n");
|
||||||
while(1)
|
while(1)
|
||||||
{
|
{
|
||||||
uint32 c = fread(ptr, 1, 1, f);
|
cmph_uint32 c = fread(ptr, 1, 1, f);
|
||||||
if (c != 1) return NULL;
|
if (c != 1) return NULL;
|
||||||
if (*ptr == 0) break;
|
if (*ptr == 0) break;
|
||||||
++ptr;
|
++ptr;
|
||||||
}
|
}
|
||||||
for(i = 0; i < MPH_COUNT; ++i)
|
for(i = 0; i < CMPH_COUNT; ++i)
|
||||||
{
|
{
|
||||||
if (strcmp(algo_name, mph_names[i]) == 0)
|
if (strcmp(algo_name, cmph_names[i]) == 0)
|
||||||
{
|
{
|
||||||
algo = i;
|
algo = i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (algo == MPH_COUNT)
|
if (algo == CMPH_COUNT)
|
||||||
{
|
{
|
||||||
DEBUGP("Algorithm %s not found\n", algo_name);
|
DEBUGP("Algorithm %s not found\n", algo_name);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
mphf = (mphf_t *)malloc(sizeof(mphf_t));
|
mphf = (cmph_mphf_t *)malloc(sizeof(cmph_mphf_t));
|
||||||
mphf->algo = algo;
|
mphf->algo = algo;
|
||||||
fread(&(mphf->size), sizeof(mphf->size), 1, f);
|
fread(&(mphf->size), sizeof(mphf->size), 1, f);
|
||||||
mphf->size = ntohl(mphf->size);
|
mphf->size = ntohl(mphf->size);
|
||||||
mphf->data = NULL;
|
mphf->data = NULL;
|
||||||
DEBUGP("Algorithm is %s and mphf is sized %u\n", mph_names[algo], mphf->size);
|
DEBUGP("Algorithm is %s and mphf is sized %u\n", cmph_names[algo], mphf->size);
|
||||||
|
|
||||||
return mphf;
|
return mphf;
|
||||||
}
|
}
|
||||||
|
@ -5,29 +5,29 @@
|
|||||||
|
|
||||||
/** Hash generation algorithm data
|
/** Hash generation algorithm data
|
||||||
*/
|
*/
|
||||||
struct __mph_t
|
struct cmph__mph_t
|
||||||
{
|
{
|
||||||
MPH_ALGO algo;
|
CMPH_ALGO algo;
|
||||||
key_source_t *key_source;
|
cmph_key_source_t *key_source;
|
||||||
uint32 verbosity;
|
cmph_uint32 verbosity;
|
||||||
float c;
|
float c;
|
||||||
void *data; //algorithm dependent data
|
void *data; //algorithm dependent data
|
||||||
};
|
};
|
||||||
|
|
||||||
/** Hash querying algorithm data
|
/** Hash querying algorithm data
|
||||||
*/
|
*/
|
||||||
struct __mphf_t
|
struct cmph__mphf_t
|
||||||
{
|
{
|
||||||
MPH_ALGO algo;
|
CMPH_ALGO algo;
|
||||||
uint32 size;
|
cmph_uint32 size;
|
||||||
key_source_t *key_source;
|
cmph_key_source_t *key_source;
|
||||||
void *data; //algorithm dependent data
|
void *data; //algorithm dependent data
|
||||||
};
|
};
|
||||||
|
|
||||||
mph_t *__mph_new(MPH_ALGO algo, key_source_t *key_source);
|
cmph_mph_t *cmph__mph_new(CMPH_ALGO algo, cmph_key_source_t *key_source);
|
||||||
void __mph_destroy();
|
void cmph__mph_destroy();
|
||||||
void __mphf_dump(mphf_t *mphf, FILE *);
|
void cmph__mphf_dump(cmph_mphf_t *mphf, FILE *);
|
||||||
mphf_t *__mphf_load(FILE *f);
|
cmph_mphf_t *cmph__mphf_load(FILE *f);
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,13 +1,14 @@
|
|||||||
#ifndef __CMPH_TYPES_H__
|
#ifndef __CMPH_TYPES_H__
|
||||||
#define __CMPH_TYPES_H__
|
#define __CMPH_TYPES_H__
|
||||||
|
|
||||||
typedef unsigned char uint8;
|
typedef unsigned char cmph_uint8;
|
||||||
typedef unsigned short uint16;
|
typedef unsigned short cmph_uint16;
|
||||||
typedef unsigned int uint32;
|
typedef unsigned int cmph_uint32;
|
||||||
|
|
||||||
typedef enum { HASH_JENKINS, HASH_DJB2, HASH_SDBM, HASH_FNV, HASH_GLIB, HASH_PJW, HASH_COUNT } CMPH_HASH;
|
typedef enum { CMPH_HASH_JENKINS, CMPH_HASH_DJB2, CMPH_HASH_SDBM, CMPH_HASH_FNV,
|
||||||
extern const char *hash_names[];
|
CMPH_HASH_GLIB, CMPH_HASH_PJW, CMPH_HASH_COUNT } CMPH_HASH;
|
||||||
typedef enum { MPH_CZECH, MPH_BMZ, MPH_COUNT } MPH_ALGO; /* included -- Fabiano */
|
extern const char *cmph_hash_names[];
|
||||||
extern const char *mph_names[];
|
typedef enum { CMPH_CZECH, CMPH_BMZ, CMPH_COUNT } CMPH_ALGO; /* included -- Fabiano */
|
||||||
|
extern const char *cmph_names[];
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
184
src/czech.c
184
src/czech.c
@ -18,23 +18,23 @@ static const char bitmask[8] = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 <<
|
|||||||
#define SETBIT(array, i) (array[(i) / 8] |= bitmask[(i) % 8])
|
#define SETBIT(array, i) (array[(i) / 8] |= bitmask[(i) % 8])
|
||||||
#define UNSETBIT(array, i) (array[(i) / 8] &= (~(bitmask[(i) % 8])))
|
#define UNSETBIT(array, i) (array[(i) / 8] &= (~(bitmask[(i) % 8])))
|
||||||
|
|
||||||
static int czech_gen_edges(mph_t *mph);
|
static int czech_gen_edges(cmph_mph_t *mph);
|
||||||
static void czech_traverse(czech_mph_data_t *czech, uint8 *visited, uint32 v);
|
static void czech_traverse(cmph_czech_mph_data_t *czech, cmph_uint8 *visited, cmph_uint32 v);
|
||||||
|
|
||||||
mph_t *czech_mph_new(key_source_t *key_source)
|
cmph_mph_t *cmph_czech_mph_new(cmph_key_source_t *key_source)
|
||||||
{
|
{
|
||||||
mph_t *mph = NULL;
|
cmph_mph_t *mph = NULL;
|
||||||
czech_mph_data_t *czech = NULL;
|
cmph_czech_mph_data_t *czech = NULL;
|
||||||
mph = __mph_new(MPH_CZECH, key_source);
|
mph = cmph__mph_new(CMPH_CZECH, key_source);
|
||||||
if (mph == NULL) return NULL;
|
if (mph == NULL) return NULL;
|
||||||
czech = (czech_mph_data_t *)malloc(sizeof(czech_mph_data_t));
|
czech = (cmph_czech_mph_data_t *)malloc(sizeof(cmph_czech_mph_data_t));
|
||||||
if (czech == NULL)
|
if (czech == NULL)
|
||||||
{
|
{
|
||||||
__mph_destroy(mph);
|
cmph__mph_destroy(mph);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
czech->hashfuncs[0] = HASH_JENKINS;
|
czech->hashfuncs[0] = CMPH_HASH_JENKINS;
|
||||||
czech->hashfuncs[1] = HASH_JENKINS;
|
czech->hashfuncs[1] = CMPH_HASH_JENKINS;
|
||||||
czech->g = NULL;
|
czech->g = NULL;
|
||||||
czech->graph = NULL;
|
czech->graph = NULL;
|
||||||
czech->hashes = NULL;
|
czech->hashes = NULL;
|
||||||
@ -42,20 +42,20 @@ mph_t *czech_mph_new(key_source_t *key_source)
|
|||||||
assert(mph->data);
|
assert(mph->data);
|
||||||
return mph;
|
return mph;
|
||||||
}
|
}
|
||||||
void czech_mph_destroy(mph_t *mph)
|
void cmph_czech_mph_destroy(cmph_mph_t *mph)
|
||||||
{
|
{
|
||||||
czech_mph_data_t *data = (czech_mph_data_t *)mph->data;
|
cmph_czech_mph_data_t *data = (cmph_czech_mph_data_t *)mph->data;
|
||||||
DEBUGP("Destroying algorithm dependent data\n");
|
DEBUGP("Destroying algorithm dependent data\n");
|
||||||
free(data);
|
free(data);
|
||||||
__mph_destroy(mph);
|
cmph__mph_destroy(mph);
|
||||||
}
|
}
|
||||||
|
|
||||||
void czech_mph_set_hashfuncs(mph_t *mph, CMPH_HASH *hashfuncs)
|
void cmph_czech_mph_set_hashfuncs(cmph_mph_t *mph, CMPH_HASH *hashfuncs)
|
||||||
{
|
{
|
||||||
czech_mph_data_t *czech = (czech_mph_data_t *)mph->data;
|
cmph_czech_mph_data_t *czech = (cmph_czech_mph_data_t *)mph->data;
|
||||||
CMPH_HASH *hashptr = hashfuncs;
|
CMPH_HASH *hashptr = hashfuncs;
|
||||||
uint32 i = 0;
|
cmph_uint32 i = 0;
|
||||||
while(*hashptr != HASH_COUNT)
|
while(*hashptr != CMPH_HASH_COUNT)
|
||||||
{
|
{
|
||||||
if (i >= 2) break; //czech only uses two hash functions
|
if (i >= 2) break; //czech only uses two hash functions
|
||||||
czech->hashfuncs[i] = *hashptr;
|
czech->hashfuncs[i] = *hashptr;
|
||||||
@ -63,22 +63,22 @@ void czech_mph_set_hashfuncs(mph_t *mph, CMPH_HASH *hashfuncs)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mphf_t *czech_mph_create(mph_t *mph, float c)
|
cmph_mphf_t *cmph_czech_mph_create(cmph_mph_t *mph, float c)
|
||||||
{
|
{
|
||||||
mphf_t *mphf = NULL;
|
cmph_mphf_t *mphf = NULL;
|
||||||
czech_mphf_data_t *czechf = NULL;
|
cmph_czech_mphf_data_t *czechf = NULL;
|
||||||
|
|
||||||
uint32 i;
|
cmph_uint32 i;
|
||||||
uint32 iterations = 20;
|
cmph_uint32 iterations = 20;
|
||||||
uint8 *visited = NULL;
|
cmph_uint8 *visited = NULL;
|
||||||
czech_mph_data_t *czech = (czech_mph_data_t *)mph->data;
|
cmph_czech_mph_data_t *czech = (cmph_czech_mph_data_t *)mph->data;
|
||||||
czech->m = mph->key_source->nkeys;
|
czech->m = mph->key_source->nkeys;
|
||||||
czech->n = ceil(c * mph->key_source->nkeys);
|
czech->n = ceil(c * mph->key_source->nkeys);
|
||||||
DEBUGP("m (edges): %u n (vertices): %u c: %f\n", czech->m, czech->n, c);
|
DEBUGP("m (edges): %u n (vertices): %u c: %f\n", czech->m, czech->n, c);
|
||||||
czech->graph = graph_new(czech->n, czech->m);
|
czech->graph = cmph_graph_new(czech->n, czech->m);
|
||||||
DEBUGP("Created graph\n");
|
DEBUGP("Created graph\n");
|
||||||
|
|
||||||
czech->hashes = (hash_state_t **)malloc(sizeof(hash_state_t *)*3);
|
czech->hashes = (cmph_hash_state_t **)malloc(sizeof(cmph_hash_state_t *)*3);
|
||||||
for(i = 0; i < 3; ++i) czech->hashes[i] = NULL;
|
for(i = 0; i < 3; ++i) czech->hashes[i] = NULL;
|
||||||
//Mapping step
|
//Mapping step
|
||||||
if (mph->verbosity)
|
if (mph->verbosity)
|
||||||
@ -88,15 +88,15 @@ mphf_t *czech_mph_create(mph_t *mph, float c)
|
|||||||
while(1)
|
while(1)
|
||||||
{
|
{
|
||||||
int ok;
|
int ok;
|
||||||
czech->hashes[0] = hash_state_new(czech->hashfuncs[0], czech->n);
|
czech->hashes[0] = cmph_hash_state_new(czech->hashfuncs[0], czech->n);
|
||||||
czech->hashes[1] = hash_state_new(czech->hashfuncs[1], czech->n);
|
czech->hashes[1] = cmph_hash_state_new(czech->hashfuncs[1], czech->n);
|
||||||
ok = czech_gen_edges(mph);
|
ok = czech_gen_edges(mph);
|
||||||
if (!ok)
|
if (!ok)
|
||||||
{
|
{
|
||||||
--iterations;
|
--iterations;
|
||||||
hash_state_destroy(czech->hashes[0]);
|
cmph_hash_state_destroy(czech->hashes[0]);
|
||||||
czech->hashes[0] = NULL;
|
czech->hashes[0] = NULL;
|
||||||
hash_state_destroy(czech->hashes[1]);
|
cmph_hash_state_destroy(czech->hashes[1]);
|
||||||
czech->hashes[1] = NULL;
|
czech->hashes[1] = NULL;
|
||||||
DEBUGP("%u iterations remaining\n", iterations);
|
DEBUGP("%u iterations remaining\n", iterations);
|
||||||
if (mph->verbosity)
|
if (mph->verbosity)
|
||||||
@ -109,7 +109,7 @@ mphf_t *czech_mph_create(mph_t *mph, float c)
|
|||||||
}
|
}
|
||||||
if (iterations == 0)
|
if (iterations == 0)
|
||||||
{
|
{
|
||||||
graph_destroy(czech->graph);
|
cmph_graph_destroy(czech->graph);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -122,7 +122,7 @@ mphf_t *czech_mph_create(mph_t *mph, float c)
|
|||||||
visited = (char *)malloc(czech->n/8 + 1);
|
visited = (char *)malloc(czech->n/8 + 1);
|
||||||
memset(visited, 0, czech->n/8 + 1);
|
memset(visited, 0, czech->n/8 + 1);
|
||||||
free(czech->g);
|
free(czech->g);
|
||||||
czech->g = malloc(czech->n * sizeof(uint32));
|
czech->g = malloc(czech->n * sizeof(cmph_uint32));
|
||||||
assert(czech->g);
|
assert(czech->g);
|
||||||
for (i = 0; i < czech->n; ++i)
|
for (i = 0; i < czech->n; ++i)
|
||||||
{
|
{
|
||||||
@ -132,13 +132,13 @@ mphf_t *czech_mph_create(mph_t *mph, float c)
|
|||||||
czech_traverse(czech, visited, i);
|
czech_traverse(czech, visited, i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
graph_destroy(czech->graph);
|
cmph_graph_destroy(czech->graph);
|
||||||
free(visited);
|
free(visited);
|
||||||
czech->graph = NULL;
|
czech->graph = NULL;
|
||||||
|
|
||||||
mphf = (mphf_t *)malloc(sizeof(mphf_t));
|
mphf = (cmph_mphf_t *)malloc(sizeof(cmph_mphf_t));
|
||||||
mphf->algo = mph->algo;
|
mphf->algo = mph->algo;
|
||||||
czechf = (czech_mphf_data_t *)malloc(sizeof(czech_mph_data_t));
|
czechf = (cmph_czech_mphf_data_t *)malloc(sizeof(cmph_czech_mph_data_t));
|
||||||
czechf->g = czech->g;
|
czechf->g = czech->g;
|
||||||
czech->g = NULL; //transfer memory ownership
|
czech->g = NULL; //transfer memory ownership
|
||||||
czechf->hashes = czech->hashes;
|
czechf->hashes = czech->hashes;
|
||||||
@ -155,43 +155,43 @@ mphf_t *czech_mph_create(mph_t *mph, float c)
|
|||||||
return mphf;
|
return mphf;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void czech_traverse(czech_mph_data_t *czech, uint8 *visited, uint32 v)
|
static void czech_traverse(cmph_czech_mph_data_t *czech, cmph_uint8 *visited, cmph_uint32 v)
|
||||||
{
|
{
|
||||||
|
|
||||||
graph_iterator_t it = graph_neighbors_it(czech->graph, v);
|
cmph_graph_iterator_t it = cmph_graph_neighbors_it(czech->graph, v);
|
||||||
uint32 neighbor = 0;
|
cmph_uint32 neighbor = 0;
|
||||||
SETBIT(visited,v);
|
SETBIT(visited,v);
|
||||||
|
|
||||||
DEBUGP("Visiting vertex %u\n", v);
|
DEBUGP("Visiting vertex %u\n", v);
|
||||||
while((neighbor = graph_next_neighbor(czech->graph, &it)) != GRAPH_NO_NEIGHBOR)
|
while((neighbor = cmph_graph_next_neighbor(czech->graph, &it)) != CMPH_GRAPH_NO_NEIGHBOR)
|
||||||
{
|
{
|
||||||
DEBUGP("Visiting neighbor %u\n", neighbor);
|
DEBUGP("Visiting neighbor %u\n", neighbor);
|
||||||
if(GETBIT(visited,neighbor)) continue;
|
if(GETBIT(visited,neighbor)) continue;
|
||||||
DEBUGP("Visiting neighbor %u\n", neighbor);
|
DEBUGP("Visiting neighbor %u\n", neighbor);
|
||||||
DEBUGP("Visiting edge %u->%u with id %u\n", v, neighbor, graph_edge_id(czech->graph, v, neighbor));
|
DEBUGP("Visiting edge %u->%u with id %u\n", v, neighbor, cmph_graph_edge_id(czech->graph, v, neighbor));
|
||||||
czech->g[neighbor] = graph_edge_id(czech->graph, v, neighbor) - czech->g[v];
|
czech->g[neighbor] = cmph_graph_edge_id(czech->graph, v, neighbor) - czech->g[v];
|
||||||
DEBUGP("g is %u (%u - %u mod %u)\n", czech->g[neighbor], graph_edge_id(czech->graph, v, neighbor), czech->g[v], czech->m);
|
DEBUGP("g is %u (%u - %u mod %u)\n", czech->g[neighbor], cmph_graph_edge_id(czech->graph, v, neighbor), czech->g[v], czech->m);
|
||||||
czech_traverse(czech, visited, neighbor);
|
czech_traverse(czech, visited, neighbor);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int czech_gen_edges(mph_t *mph)
|
static int czech_gen_edges(cmph_mph_t *mph)
|
||||||
{
|
{
|
||||||
uint32 e;
|
cmph_uint32 e;
|
||||||
czech_mph_data_t *czech = (czech_mph_data_t *)mph->data;
|
cmph_czech_mph_data_t *czech = (cmph_czech_mph_data_t *)mph->data;
|
||||||
int cycles = 0;
|
int cycles = 0;
|
||||||
|
|
||||||
DEBUGP("Generating edges for %u vertices\n", czech->n);
|
DEBUGP("Generating edges for %u vertices\n", czech->n);
|
||||||
graph_clear_edges(czech->graph);
|
cmph_graph_clear_edges(czech->graph);
|
||||||
mph->key_source->rewind(mph->key_source->data);
|
mph->key_source->rewind(mph->key_source->data);
|
||||||
for (e = 0; e < mph->key_source->nkeys; ++e)
|
for (e = 0; e < mph->key_source->nkeys; ++e)
|
||||||
{
|
{
|
||||||
uint32 h1, h2;
|
cmph_uint32 h1, h2;
|
||||||
uint32 keylen;
|
cmph_uint32 keylen;
|
||||||
char *key;
|
char *key;
|
||||||
mph->key_source->read(mph->key_source->data, &key, &keylen);
|
mph->key_source->read(mph->key_source->data, &key, &keylen);
|
||||||
h1 = hash(czech->hashes[0], key, keylen) % czech->n;
|
h1 = cmph_hash(czech->hashes[0], key, keylen) % czech->n;
|
||||||
h2 = hash(czech->hashes[1], key, keylen) % czech->n;
|
h2 = cmph_hash(czech->hashes[1], key, keylen) % czech->n;
|
||||||
if (h1 == h2) if (++h2 >= czech->n) h2 = 0;
|
if (h1 == h2) if (++h2 >= czech->n) h2 = 0;
|
||||||
if (h1 == h2)
|
if (h1 == h2)
|
||||||
{
|
{
|
||||||
@ -201,51 +201,51 @@ static int czech_gen_edges(mph_t *mph)
|
|||||||
}
|
}
|
||||||
DEBUGP("Adding edge: %u -> %u for key %s\n", h1, h2, key);
|
DEBUGP("Adding edge: %u -> %u for key %s\n", h1, h2, key);
|
||||||
mph->key_source->dispose(mph->key_source->data, key, keylen);
|
mph->key_source->dispose(mph->key_source->data, key, keylen);
|
||||||
graph_add_edge(czech->graph, h1, h2);
|
cmph_graph_add_edge(czech->graph, h1, h2);
|
||||||
}
|
}
|
||||||
cycles = graph_is_cyclic(czech->graph);
|
cycles = cmph_graph_is_cyclic(czech->graph);
|
||||||
if (mph->verbosity && cycles) fprintf(stderr, "Cyclic graph generated\n");
|
if (mph->verbosity && cycles) fprintf(stderr, "Cyclic graph generated\n");
|
||||||
DEBUGP("Looking for cycles: %u\n", cycles);
|
DEBUGP("Looking for cycles: %u\n", cycles);
|
||||||
|
|
||||||
return ! cycles;
|
return ! cycles;
|
||||||
}
|
}
|
||||||
|
|
||||||
int czech_mphf_dump(mphf_t *mphf, FILE *fd)
|
int cmph_czech_mphf_dump(cmph_mphf_t *mphf, FILE *fd)
|
||||||
{
|
{
|
||||||
char *buf = NULL;
|
char *buf = NULL;
|
||||||
uint32 buflen;
|
cmph_uint32 buflen;
|
||||||
uint32 nbuflen;
|
cmph_uint32 nbuflen;
|
||||||
uint32 i;
|
cmph_uint32 i;
|
||||||
uint32 two = htonl(2); //number of hash functions
|
cmph_uint32 two = htonl(2); //number of hash functions
|
||||||
czech_mphf_data_t *data = (czech_mphf_data_t *)mphf->data;
|
cmph_czech_mphf_data_t *data = (cmph_czech_mphf_data_t *)mphf->data;
|
||||||
uint32 nn, nm;
|
cmph_uint32 nn, nm;
|
||||||
__mphf_dump(mphf, fd);
|
cmph__mphf_dump(mphf, fd);
|
||||||
|
|
||||||
fwrite(&two, sizeof(uint32), 1, fd);
|
fwrite(&two, sizeof(cmph_uint32), 1, fd);
|
||||||
|
|
||||||
hash_state_dump(data->hashes[0], &buf, &buflen);
|
cmph_hash_state_dump(data->hashes[0], &buf, &buflen);
|
||||||
DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
|
DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
|
||||||
nbuflen = htonl(buflen);
|
nbuflen = htonl(buflen);
|
||||||
fwrite(&nbuflen, sizeof(uint32), 1, fd);
|
fwrite(&nbuflen, sizeof(cmph_uint32), 1, fd);
|
||||||
fwrite(buf, buflen, 1, fd);
|
fwrite(buf, buflen, 1, fd);
|
||||||
free(buf);
|
free(buf);
|
||||||
|
|
||||||
hash_state_dump(data->hashes[1], &buf, &buflen);
|
cmph_hash_state_dump(data->hashes[1], &buf, &buflen);
|
||||||
DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
|
DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
|
||||||
nbuflen = htonl(buflen);
|
nbuflen = htonl(buflen);
|
||||||
fwrite(&nbuflen, sizeof(uint32), 1, fd);
|
fwrite(&nbuflen, sizeof(cmph_uint32), 1, fd);
|
||||||
fwrite(buf, buflen, 1, fd);
|
fwrite(buf, buflen, 1, fd);
|
||||||
free(buf);
|
free(buf);
|
||||||
|
|
||||||
nn = htonl(data->n);
|
nn = htonl(data->n);
|
||||||
fwrite(&nn, sizeof(uint32), 1, fd);
|
fwrite(&nn, sizeof(cmph_uint32), 1, fd);
|
||||||
nm = htonl(data->m);
|
nm = htonl(data->m);
|
||||||
fwrite(&nm, sizeof(uint32), 1, fd);
|
fwrite(&nm, sizeof(cmph_uint32), 1, fd);
|
||||||
|
|
||||||
for (i = 0; i < data->n; ++i)
|
for (i = 0; i < data->n; ++i)
|
||||||
{
|
{
|
||||||
uint32 ng = htonl(data->g[i]);
|
cmph_uint32 ng = htonl(data->g[i]);
|
||||||
fwrite(&ng, sizeof(uint32), 1, fd);
|
fwrite(&ng, sizeof(cmph_uint32), 1, fd);
|
||||||
}
|
}
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
fprintf(stderr, "G: ");
|
fprintf(stderr, "G: ");
|
||||||
@ -255,44 +255,44 @@ int czech_mphf_dump(mphf_t *mphf, FILE *fd)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void czech_mphf_load(FILE *f, mphf_t *mphf)
|
void cmph_czech_mphf_load(FILE *f, cmph_mphf_t *mphf)
|
||||||
{
|
{
|
||||||
uint32 nhashes;
|
cmph_uint32 nhashes;
|
||||||
char fbuf[BUFSIZ];
|
char fbuf[BUFSIZ];
|
||||||
char *buf = NULL;
|
char *buf = NULL;
|
||||||
uint32 buflen;
|
cmph_uint32 buflen;
|
||||||
uint32 i;
|
cmph_uint32 i;
|
||||||
hash_state_t *state;
|
cmph_hash_state_t *state;
|
||||||
czech_mphf_data_t *czech = (czech_mphf_data_t *)malloc(sizeof(czech_mphf_data_t));
|
cmph_czech_mphf_data_t *czech = (cmph_czech_mphf_data_t *)malloc(sizeof(cmph_czech_mphf_data_t));
|
||||||
|
|
||||||
DEBUGP("Loading czech mphf\n");
|
DEBUGP("Loading czech mphf\n");
|
||||||
mphf->data = czech;
|
mphf->data = czech;
|
||||||
fread(&nhashes, sizeof(uint32), 1, f);
|
fread(&nhashes, sizeof(cmph_uint32), 1, f);
|
||||||
nhashes = ntohl(nhashes);
|
nhashes = ntohl(nhashes);
|
||||||
czech->hashes = (hash_state_t **)malloc(sizeof(hash_state_t *)*(nhashes + 1));
|
czech->hashes = (cmph_hash_state_t **)malloc(sizeof(cmph_hash_state_t *)*(nhashes + 1));
|
||||||
czech->hashes[nhashes] = NULL;
|
czech->hashes[nhashes] = NULL;
|
||||||
DEBUGP("Reading %u hashes\n", nhashes);
|
DEBUGP("Reading %u hashes\n", nhashes);
|
||||||
for (i = 0; i < nhashes; ++i)
|
for (i = 0; i < nhashes; ++i)
|
||||||
{
|
{
|
||||||
hash_state_t *state = NULL;
|
cmph_hash_state_t *state = NULL;
|
||||||
fread(&buflen, sizeof(uint32), 1, f);
|
fread(&buflen, sizeof(cmph_uint32), 1, f);
|
||||||
buflen = ntohl(buflen);
|
buflen = ntohl(buflen);
|
||||||
DEBUGP("Hash state has %u bytes\n", buflen);
|
DEBUGP("Hash state has %u bytes\n", buflen);
|
||||||
buf = (char *)malloc(buflen);
|
buf = (char *)malloc(buflen);
|
||||||
fread(buf, buflen, 1, f);
|
fread(buf, buflen, 1, f);
|
||||||
state = hash_state_load(buf, buflen);
|
state = cmph_hash_state_load(buf, buflen);
|
||||||
czech->hashes[i] = state;
|
czech->hashes[i] = state;
|
||||||
free(buf);
|
free(buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUGP("Reading m and n\n");
|
DEBUGP("Reading m and n\n");
|
||||||
fread(&(czech->n), sizeof(uint32), 1, f);
|
fread(&(czech->n), sizeof(cmph_uint32), 1, f);
|
||||||
czech->n = ntohl(czech->n);
|
czech->n = ntohl(czech->n);
|
||||||
fread(&(czech->m), sizeof(uint32), 1, f);
|
fread(&(czech->m), sizeof(cmph_uint32), 1, f);
|
||||||
czech->m = ntohl(czech->m);
|
czech->m = ntohl(czech->m);
|
||||||
|
|
||||||
czech->g = (uint32 *)malloc(sizeof(uint32)*czech->n);
|
czech->g = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*czech->n);
|
||||||
fread(czech->g, czech->n*sizeof(uint32), 1, f);
|
fread(czech->g, czech->n*sizeof(cmph_uint32), 1, f);
|
||||||
for (i = 0; i < czech->n; ++i) czech->g[i] = ntohl(czech->g[i]);
|
for (i = 0; i < czech->n; ++i) czech->g[i] = ntohl(czech->g[i]);
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
fprintf(stderr, "G: ");
|
fprintf(stderr, "G: ");
|
||||||
@ -303,22 +303,22 @@ void czech_mphf_load(FILE *f, mphf_t *mphf)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
uint32 czech_mphf_search(mphf_t *mphf, const char *key, uint32 keylen)
|
cmph_uint32 cmph_czech_mphf_search(cmph_mphf_t *mphf, const char *key, cmph_uint32 keylen)
|
||||||
{
|
{
|
||||||
czech_mphf_data_t *czech = mphf->data;
|
cmph_czech_mphf_data_t *czech = mphf->data;
|
||||||
uint32 h1 = hash(czech->hashes[0], key, keylen) % czech->n;
|
cmph_uint32 h1 = cmph_hash(czech->hashes[0], key, keylen) % czech->n;
|
||||||
uint32 h2 = hash(czech->hashes[1], key, keylen) % czech->n;
|
cmph_uint32 h2 = cmph_hash(czech->hashes[1], key, keylen) % czech->n;
|
||||||
DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
|
DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
|
||||||
if (h1 == h2 && ++h2 > czech->n) h2 = 0;
|
if (h1 == h2 && ++h2 > czech->n) h2 = 0;
|
||||||
DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, czech->g[h1], czech->g[h2], czech->m);
|
DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, czech->g[h1], czech->g[h2], czech->m);
|
||||||
return (czech->g[h1] + czech->g[h2]) % czech->m;
|
return (czech->g[h1] + czech->g[h2]) % czech->m;
|
||||||
}
|
}
|
||||||
void czech_mphf_destroy(mphf_t *mphf)
|
void cmph_czech_mphf_destroy(cmph_mphf_t *mphf)
|
||||||
{
|
{
|
||||||
czech_mphf_data_t *data = (czech_mphf_data_t *)mphf->data;
|
cmph_czech_mphf_data_t *data = (cmph_czech_mphf_data_t *)mphf->data;
|
||||||
free(data->g);
|
free(data->g);
|
||||||
hash_state_destroy(data->hashes[0]);
|
cmph_hash_state_destroy(data->hashes[0]);
|
||||||
hash_state_destroy(data->hashes[1]);
|
cmph_hash_state_destroy(data->hashes[1]);
|
||||||
free(data->hashes);
|
free(data->hashes);
|
||||||
free(data);
|
free(data);
|
||||||
free(mphf);
|
free(mphf);
|
||||||
|
20
src/czech.h
20
src/czech.h
@ -4,16 +4,16 @@
|
|||||||
#include "graph.h"
|
#include "graph.h"
|
||||||
#include "cmph.h"
|
#include "cmph.h"
|
||||||
|
|
||||||
typedef struct __czech_mphf_data_t czech_mphf_data_t;
|
typedef struct cmph__czech_mphf_data_t cmph_czech_mphf_data_t;
|
||||||
typedef struct __czech_mph_data_t czech_mph_data_t;
|
typedef struct cmph__czech_mph_data_t cmph_czech_mph_data_t;
|
||||||
|
|
||||||
mph_t *czech_mph_new(key_source_t *key_source);
|
cmph_mph_t *cmph_czech_mph_new(cmph_key_source_t *key_source);
|
||||||
void czech_mph_set_hashfuncs(mph_t *mph, CMPH_HASH *hashfuncs);
|
void cmph_czech_mph_set_hashfuncs(cmph_mph_t *mph, CMPH_HASH *hashfuncs);
|
||||||
void czech_mph_destroy(mph_t *mph);
|
void cmph_czech_mph_destroy(cmph_mph_t *mph);
|
||||||
mphf_t *czech_mph_create(mph_t *mph, float c);
|
cmph_mphf_t *cmph_czech_mph_create(cmph_mph_t *mph, float c);
|
||||||
|
|
||||||
void czech_mphf_load(FILE *f, mphf_t *mphf);
|
void cmph_czech_mphf_load(FILE *f, cmph_mphf_t *mphf);
|
||||||
int czech_mphf_dump(mphf_t *mphf, FILE *f);
|
int cmph_czech_mphf_dump(cmph_mphf_t *mphf, FILE *f);
|
||||||
void czech_mphf_destroy(mphf_t *mphf);
|
void cmph_czech_mphf_destroy(cmph_mphf_t *mphf);
|
||||||
uint32 czech_mphf_search(mphf_t *mphf, const char *key, uint32 keylen);
|
cmph_uint32 cmph_czech_mphf_search(cmph_mphf_t *mphf, const char *key, cmph_uint32 keylen);
|
||||||
#endif
|
#endif
|
||||||
|
@ -3,22 +3,22 @@
|
|||||||
|
|
||||||
#include "hash_state.h"
|
#include "hash_state.h"
|
||||||
|
|
||||||
struct __czech_mphf_data_t
|
struct cmph__czech_mphf_data_t
|
||||||
{
|
{
|
||||||
uint32 m; //edges (words) count
|
cmph_uint32 m; //edges (words) count
|
||||||
uint32 n; //vertex count
|
cmph_uint32 n; //vertex count
|
||||||
uint32 *g;
|
cmph_uint32 *g;
|
||||||
hash_state_t **hashes;
|
cmph_hash_state_t **hashes;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct __czech_mph_data_t
|
struct cmph__czech_mph_data_t
|
||||||
{
|
{
|
||||||
CMPH_HASH hashfuncs[2];
|
CMPH_HASH hashfuncs[2];
|
||||||
uint32 m; //edges (words) count
|
cmph_uint32 m; //edges (words) count
|
||||||
uint32 n; //vertex count
|
cmph_uint32 n; //vertex count
|
||||||
graph_t *graph;
|
cmph_graph_t *graph;
|
||||||
uint32 *g;
|
cmph_uint32 *g;
|
||||||
hash_state_t **hashes;
|
cmph_hash_state_t **hashes;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,23 +1,23 @@
|
|||||||
#include "djb2_hash.h"
|
#include "djb2_hash.h"
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
djb2_state_t *djb2_state_new()
|
cmph_djb2_state_t *cmph_djb2_state_new()
|
||||||
{
|
{
|
||||||
djb2_state_t *state = (djb2_state_t *)malloc(sizeof(djb2_state_t));
|
cmph_djb2_state_t *state = (cmph_djb2_state_t *)malloc(sizeof(cmph_djb2_state_t));
|
||||||
state->hashfunc = HASH_DJB2;
|
state->hashfunc = CMPH_HASH_DJB2;
|
||||||
return state;
|
return state;
|
||||||
}
|
}
|
||||||
|
|
||||||
void djb2_state_destroy(djb2_state_t *state)
|
void cmph_djb2_state_destroy(cmph_djb2_state_t *state)
|
||||||
{
|
{
|
||||||
free(state);
|
free(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32 djb2_hash(djb2_state_t *state, const char *k, uint32 keylen)
|
cmph_uint32 cmph_djb2_hash(cmph_djb2_state_t *state, const char *k, cmph_uint32 keylen)
|
||||||
{
|
{
|
||||||
register uint32 hash = 5381;
|
register cmph_uint32 hash = 5381;
|
||||||
const unsigned char *ptr = k;
|
const unsigned char *ptr = k;
|
||||||
uint32 i = 0;
|
cmph_uint32 i = 0;
|
||||||
while (i < keylen)
|
while (i < keylen)
|
||||||
{
|
{
|
||||||
hash = hash*33 ^ *ptr;
|
hash = hash*33 ^ *ptr;
|
||||||
@ -27,16 +27,16 @@ uint32 djb2_hash(djb2_state_t *state, const char *k, uint32 keylen)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void djb2_state_dump(djb2_state_t *state, char **buf, uint32 *buflen)
|
void cmph_djb2_state_dump(cmph_djb2_state_t *state, char **buf, cmph_uint32 *buflen)
|
||||||
{
|
{
|
||||||
*buf = NULL;
|
*buf = NULL;
|
||||||
*buflen = 0;
|
*buflen = 0;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
djb2_state_t *djb2_state_load(const char *buf, uint32 buflen)
|
cmph_djb2_state_t *cmph_djb2_state_load(const char *buf, cmph_uint32 buflen)
|
||||||
{
|
{
|
||||||
djb2_state_t *state = (djb2_state_t *)malloc(sizeof(djb2_state_t));
|
cmph_djb2_state_t *state = (cmph_djb2_state_t *)malloc(sizeof(cmph_djb2_state_t));
|
||||||
state->hashfunc = HASH_DJB2;
|
state->hashfunc = CMPH_HASH_DJB2;
|
||||||
return state;
|
return state;
|
||||||
}
|
}
|
||||||
|
@ -3,15 +3,15 @@
|
|||||||
|
|
||||||
#include "hash.h"
|
#include "hash.h"
|
||||||
|
|
||||||
typedef struct __djb2_state_t
|
typedef struct cmph__djb2_state_t
|
||||||
{
|
{
|
||||||
CMPH_HASH hashfunc;
|
CMPH_HASH hashfunc;
|
||||||
} djb2_state_t;
|
} cmph_djb2_state_t;
|
||||||
|
|
||||||
djb2_state_t *djb2_state_new();
|
cmph_djb2_state_t *cmph_djb2_state_new();
|
||||||
uint32 djb2_hash(djb2_state_t *state, const char *k, uint32 keylen);
|
cmph_uint32 cmph_djb2_hash(cmph_djb2_state_t *state, const char *k, cmph_uint32 keylen);
|
||||||
void djb2_state_dump(djb2_state_t *state, char **buf, uint32 *buflen);
|
void cmph_djb2_state_dump(cmph_djb2_state_t *state, char **buf, cmph_uint32 *buflen);
|
||||||
djb2_state_t *djb2_state_load(const char *buf, uint32 buflen);
|
cmph_djb2_state_t *cmph_djb2_state_load(const char *buf, cmph_uint32 buflen);
|
||||||
void djb2_state_destroy(djb2_state_t *state);
|
void cmph_djb2_state_destroy(cmph_djb2_state_t *state);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,19 +1,19 @@
|
|||||||
#include "fnv_hash.h"
|
#include "fnv_hash.h"
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
fnv_state_t *fnv_state_new()
|
cmph_fnv_state_t *cmph_fnv_state_new()
|
||||||
{
|
{
|
||||||
fnv_state_t *state = (fnv_state_t *)malloc(sizeof(fnv_state_t));
|
cmph_fnv_state_t *state = (cmph_fnv_state_t *)malloc(sizeof(cmph_fnv_state_t));
|
||||||
state->hashfunc = HASH_FNV;
|
state->hashfunc = CMPH_HASH_FNV;
|
||||||
return state;
|
return state;
|
||||||
}
|
}
|
||||||
|
|
||||||
void fnv_state_destroy(fnv_state_t *state)
|
void cmph_fnv_state_destroy(cmph_fnv_state_t *state)
|
||||||
{
|
{
|
||||||
free(state);
|
free(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32 fnv_hash(fnv_state_t *state, const char *k, uint32 keylen)
|
cmph_uint32 cmph_fnv_hash(cmph_fnv_state_t *state, const char *k, cmph_uint32 keylen)
|
||||||
{
|
{
|
||||||
const unsigned char *bp = (const unsigned char *)k;
|
const unsigned char *bp = (const unsigned char *)k;
|
||||||
const unsigned char *be = bp + keylen;
|
const unsigned char *be = bp + keylen;
|
||||||
@ -31,16 +31,16 @@ uint32 fnv_hash(fnv_state_t *state, const char *k, uint32 keylen)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void fnv_state_dump(fnv_state_t *state, char **buf, uint32 *buflen)
|
void cmph_fnv_state_dump(cmph_fnv_state_t *state, char **buf, cmph_uint32 *buflen)
|
||||||
{
|
{
|
||||||
*buf = NULL;
|
*buf = NULL;
|
||||||
*buflen = 0;
|
*buflen = 0;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
fnv_state_t *fnv_state_load(const char *buf, uint32 buflen)
|
cmph_fnv_state_t *cmph_fnv_state_load(const char *buf, cmph_uint32 buflen)
|
||||||
{
|
{
|
||||||
fnv_state_t *state = (fnv_state_t *)malloc(sizeof(fnv_state_t));
|
cmph_fnv_state_t *state = (cmph_fnv_state_t *)malloc(sizeof(cmph_fnv_state_t));
|
||||||
state->hashfunc = HASH_FNV;
|
state->hashfunc = CMPH_HASH_FNV;
|
||||||
return state;
|
return state;
|
||||||
}
|
}
|
||||||
|
@ -3,15 +3,15 @@
|
|||||||
|
|
||||||
#include "hash.h"
|
#include "hash.h"
|
||||||
|
|
||||||
typedef struct __fnv_state_t
|
typedef struct cmph__fnv_state_t
|
||||||
{
|
{
|
||||||
CMPH_HASH hashfunc;
|
CMPH_HASH hashfunc;
|
||||||
} fnv_state_t;
|
} cmph_fnv_state_t;
|
||||||
|
|
||||||
fnv_state_t *fnv_state_new();
|
cmph_fnv_state_t *cmph_fnv_state_new();
|
||||||
uint32 fnv_hash(fnv_state_t *state, const char *k, uint32 keylen);
|
cmph_uint32 cmph_fnv_hash(cmph_fnv_state_t *state, const char *k, cmph_uint32 keylen);
|
||||||
void fnv_state_dump(fnv_state_t *state, char **buf, uint32 *buflen);
|
void cmph_fnv_state_dump(cmph_fnv_state_t *state, char **buf, cmph_uint32 *buflen);
|
||||||
fnv_state_t *fnv_state_load(const char *buf, uint32 buflen);
|
cmph_fnv_state_t *cmph_fnv_state_load(const char *buf, cmph_uint32 buflen);
|
||||||
void fnv_state_destroy(fnv_state_t *state);
|
void cmph_fnv_state_destroy(cmph_fnv_state_t *state);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
108
src/graph.c
108
src/graph.c
@ -10,47 +10,47 @@
|
|||||||
//#define DEBUG
|
//#define DEBUG
|
||||||
#include "debug.h"
|
#include "debug.h"
|
||||||
|
|
||||||
static const uint8 bitmask[8] = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
|
static const cmph_uint8 bitmask[8] = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
|
||||||
#define GETBIT(array, i) (array[(i) / 8] & bitmask[(i) % 8])
|
#define GETBIT(array, i) (array[(i) / 8] & bitmask[(i) % 8])
|
||||||
#define SETBIT(array, i) (array[(i) / 8] |= bitmask[(i) % 8])
|
#define SETBIT(array, i) (array[(i) / 8] |= bitmask[(i) % 8])
|
||||||
#define UNSETBIT(array, i) (array[(i) / 8] &= (~(bitmask[(i) % 8])))
|
#define UNSETBIT(array, i) (array[(i) / 8] &= (~(bitmask[(i) % 8])))
|
||||||
|
|
||||||
#define abs_edge(e, i) (e % g->nedges + i * g->nedges)
|
#define abs_edge(e, i) (e % g->nedges + i * g->nedges)
|
||||||
|
|
||||||
struct __graph_t
|
struct cmph__graph_t
|
||||||
{
|
{
|
||||||
uint32 nnodes;
|
cmph_uint32 nnodes;
|
||||||
uint32 nedges;
|
cmph_uint32 nedges;
|
||||||
uint32 *edges;
|
cmph_uint32 *edges;
|
||||||
uint32 *first;
|
cmph_uint32 *first;
|
||||||
uint32 *next;
|
cmph_uint32 *next;
|
||||||
uint8 *critical_nodes; /* included -- Fabiano*/
|
cmph_uint8 *critical_nodes; /* included -- Fabiano*/
|
||||||
uint32 ncritical_nodes; /* included -- Fabiano*/
|
cmph_uint32 ncritical_nodes; /* included -- Fabiano*/
|
||||||
uint32 cedges;
|
cmph_uint32 cedges;
|
||||||
int shrinking;
|
int shrinking;
|
||||||
};
|
};
|
||||||
|
|
||||||
static uint32 EMPTY = UINT_MAX;
|
static cmph_uint32 EMPTY = UINT_MAX;
|
||||||
|
|
||||||
graph_t *graph_new(uint32 nnodes, uint32 nedges)
|
cmph_graph_t *cmph_graph_new(cmph_uint32 nnodes, cmph_uint32 nedges)
|
||||||
{
|
{
|
||||||
graph_t *graph = (graph_t *)malloc(sizeof(graph_t));
|
cmph_graph_t *graph = (cmph_graph_t *)malloc(sizeof(cmph_graph_t));
|
||||||
if (!graph) return NULL;
|
if (!graph) return NULL;
|
||||||
|
|
||||||
graph->edges = (uint32 *)malloc(sizeof(uint32) * 2 * nedges);
|
graph->edges = (cmph_uint32 *)malloc(sizeof(cmph_uint32) * 2 * nedges);
|
||||||
graph->next = (uint32 *)malloc(sizeof(uint32) * 2 * nedges);
|
graph->next = (cmph_uint32 *)malloc(sizeof(cmph_uint32) * 2 * nedges);
|
||||||
graph->first = (uint32 *)malloc(sizeof(uint32) * nnodes);
|
graph->first = (cmph_uint32 *)malloc(sizeof(cmph_uint32) * nnodes);
|
||||||
graph->critical_nodes = NULL; /* included -- Fabiano*/
|
graph->critical_nodes = NULL; /* included -- Fabiano*/
|
||||||
graph->ncritical_nodes = 0; /* included -- Fabiano*/
|
graph->ncritical_nodes = 0; /* included -- Fabiano*/
|
||||||
graph->nnodes = nnodes;
|
graph->nnodes = nnodes;
|
||||||
graph->nedges = nedges;
|
graph->nedges = nedges;
|
||||||
|
|
||||||
graph_clear_edges(graph);
|
cmph_graph_clear_edges(graph);
|
||||||
return graph;
|
return graph;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void graph_destroy(graph_t *graph)
|
void cmph_graph_destroy(cmph_graph_t *graph)
|
||||||
{
|
{
|
||||||
DEBUGP("Destroying graph\n");
|
DEBUGP("Destroying graph\n");
|
||||||
free(graph->edges);
|
free(graph->edges);
|
||||||
@ -61,9 +61,9 @@ void graph_destroy(graph_t *graph)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
void graph_print(graph_t *g)
|
void cmph_graph_print(cmph_graph_t *g)
|
||||||
{
|
{
|
||||||
uint32 i, e;
|
cmph_uint32 i, e;
|
||||||
for (i = 0; i < g->nnodes; ++i)
|
for (i = 0; i < g->nnodes; ++i)
|
||||||
{
|
{
|
||||||
DEBUGP("Printing edges connected to %u\n", i);
|
DEBUGP("Printing edges connected to %u\n", i);
|
||||||
@ -81,9 +81,9 @@ void graph_print(graph_t *g)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
void graph_add_edge(graph_t *g, uint32 v1, uint32 v2)
|
void cmph_graph_add_edge(cmph_graph_t *g, cmph_uint32 v1, cmph_uint32 v2)
|
||||||
{
|
{
|
||||||
uint32 e = g->cedges;
|
cmph_uint32 e = g->cedges;
|
||||||
|
|
||||||
assert(v1 < g->nnodes);
|
assert(v1 < g->nnodes);
|
||||||
assert(v2 < g->nnodes);
|
assert(v2 < g->nnodes);
|
||||||
@ -101,7 +101,7 @@ void graph_add_edge(graph_t *g, uint32 v1, uint32 v2)
|
|||||||
++(g->cedges);
|
++(g->cedges);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int check_edge(graph_t *g, uint32 e, uint32 v1, uint32 v2)
|
static int check_edge(cmph_graph_t *g, cmph_uint32 e, cmph_uint32 v1, cmph_uint32 v2)
|
||||||
{
|
{
|
||||||
DEBUGP("Checking edge %u %u looking for %u %u\n", g->edges[abs_edge(e, 0)], g->edges[abs_edge(e, 1)], v1, v2);
|
DEBUGP("Checking edge %u %u looking for %u %u\n", g->edges[abs_edge(e, 0)], g->edges[abs_edge(e, 1)], v1, v2);
|
||||||
if (g->edges[abs_edge(e, 0)] == v1 && g->edges[abs_edge(e, 1)] == v2) return 1;
|
if (g->edges[abs_edge(e, 0)] == v1 && g->edges[abs_edge(e, 1)] == v2) return 1;
|
||||||
@ -109,9 +109,9 @@ static int check_edge(graph_t *g, uint32 e, uint32 v1, uint32 v2)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32 graph_edge_id(graph_t *g, uint32 v1, uint32 v2)
|
cmph_uint32 cmph_graph_edge_id(cmph_graph_t *g, cmph_uint32 v1, cmph_uint32 v2)
|
||||||
{
|
{
|
||||||
uint32 e;
|
cmph_uint32 e;
|
||||||
e = g->first[v1];
|
e = g->first[v1];
|
||||||
assert(e != EMPTY);
|
assert(e != EMPTY);
|
||||||
if (check_edge(g, e, v1, v2)) return abs_edge(e, 0);
|
if (check_edge(g, e, v1, v2)) return abs_edge(e, 0);
|
||||||
@ -123,9 +123,9 @@ uint32 graph_edge_id(graph_t *g, uint32 v1, uint32 v2)
|
|||||||
while (!check_edge(g, e, v1, v2));
|
while (!check_edge(g, e, v1, v2));
|
||||||
return abs_edge(e, 0);
|
return abs_edge(e, 0);
|
||||||
}
|
}
|
||||||
static void del_edge_point(graph_t *g, uint32 v1, uint32 v2)
|
static void del_edge_point(cmph_graph_t *g, cmph_uint32 v1, cmph_uint32 v2)
|
||||||
{
|
{
|
||||||
uint32 e, prev;
|
cmph_uint32 e, prev;
|
||||||
|
|
||||||
DEBUGP("Deleting edge point %u %u\n", v1, v2);
|
DEBUGP("Deleting edge point %u %u\n", v1, v2);
|
||||||
e = g->first[v1];
|
e = g->first[v1];
|
||||||
@ -151,16 +151,16 @@ static void del_edge_point(graph_t *g, uint32 v1, uint32 v2)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void graph_del_edge(graph_t *g, uint32 v1, uint32 v2)
|
void cmph_graph_del_edge(cmph_graph_t *g, cmph_uint32 v1, cmph_uint32 v2)
|
||||||
{
|
{
|
||||||
g->shrinking = 1;
|
g->shrinking = 1;
|
||||||
del_edge_point(g, v1, v2);
|
del_edge_point(g, v1, v2);
|
||||||
del_edge_point(g, v2, v1);
|
del_edge_point(g, v2, v1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void graph_clear_edges(graph_t *g)
|
void cmph_graph_clear_edges(cmph_graph_t *g)
|
||||||
{
|
{
|
||||||
uint32 i;
|
cmph_uint32 i;
|
||||||
for (i = 0; i < g->nnodes; ++i) g->first[i] = EMPTY;
|
for (i = 0; i < g->nnodes; ++i) g->first[i] = EMPTY;
|
||||||
for (i = 0; i < g->nedges*2; ++i)
|
for (i = 0; i < g->nedges*2; ++i)
|
||||||
{
|
{
|
||||||
@ -171,9 +171,9 @@ void graph_clear_edges(graph_t *g)
|
|||||||
g->shrinking = 0;
|
g->shrinking = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int find_degree1_edge(graph_t *g, uint32 v, char *deleted, uint32 *e)
|
static int find_degree1_edge(cmph_graph_t *g, cmph_uint32 v, char *deleted, cmph_uint32 *e)
|
||||||
{
|
{
|
||||||
uint32 edge = g->first[v];
|
cmph_uint32 edge = g->first[v];
|
||||||
char found = 0;
|
char found = 0;
|
||||||
DEBUGP("Checking degree of vertex %u\n", v);
|
DEBUGP("Checking degree of vertex %u\n", v);
|
||||||
if (edge == EMPTY) return 0;
|
if (edge == EMPTY) return 0;
|
||||||
@ -195,13 +195,13 @@ static int find_degree1_edge(graph_t *g, uint32 v, char *deleted, uint32 *e)
|
|||||||
return found;
|
return found;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void cyclic_del_edge(graph_t *g, uint32 v, char *deleted)
|
static void cyclic_del_edge(cmph_graph_t *g, cmph_uint32 v, char *deleted)
|
||||||
{
|
{
|
||||||
|
|
||||||
uint32 e;
|
cmph_uint32 e;
|
||||||
char degree1;
|
char degree1;
|
||||||
uint32 v1 = v;
|
cmph_uint32 v1 = v;
|
||||||
uint32 v2 = 0;
|
cmph_uint32 v2 = 0;
|
||||||
|
|
||||||
degree1 = find_degree1_edge(g, v1, deleted, &e);
|
degree1 = find_degree1_edge(g, v1, deleted, &e);
|
||||||
if (!degree1) return;
|
if (!degree1) return;
|
||||||
@ -224,10 +224,10 @@ static void cyclic_del_edge(graph_t *g, uint32 v, char *deleted)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int graph_is_cyclic(graph_t *g)
|
int cmph_graph_is_cyclic(cmph_graph_t *g)
|
||||||
{
|
{
|
||||||
uint32 i;
|
cmph_uint32 i;
|
||||||
uint32 v;
|
cmph_uint32 v;
|
||||||
char *deleted = (char *)malloc((g->nedges*sizeof(char))/8 + 1);
|
char *deleted = (char *)malloc((g->nedges*sizeof(char))/8 + 1);
|
||||||
memset(deleted, 0, g->nedges/8 + 1);
|
memset(deleted, 0, g->nedges/8 + 1);
|
||||||
|
|
||||||
@ -249,21 +249,21 @@ int graph_is_cyclic(graph_t *g)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8 graph_node_is_critical(graph_t * g, uint32 v) /* included -- Fabiano */
|
cmph_uint8 cmph_graph_node_is_critical(cmph_graph_t * g, cmph_uint32 v) /* included -- Fabiano */
|
||||||
{
|
{
|
||||||
return GETBIT(g->critical_nodes,v);
|
return GETBIT(g->critical_nodes,v);
|
||||||
}
|
}
|
||||||
|
|
||||||
void graph_obtain_critical_nodes(graph_t *g) /* included -- Fabiano*/
|
void cmph_graph_obtain_critical_nodes(cmph_graph_t *g) /* included -- Fabiano*/
|
||||||
{
|
{
|
||||||
uint32 i;
|
cmph_uint32 i;
|
||||||
uint32 v;
|
cmph_uint32 v;
|
||||||
char *deleted = (char *)malloc((g->nedges*sizeof(char))/8+1);
|
char *deleted = (char *)malloc((g->nedges*sizeof(char))/8+1);
|
||||||
memset(deleted, 0, g->nedges/8 + 1);
|
memset(deleted, 0, g->nedges/8 + 1);
|
||||||
free(g->critical_nodes);
|
free(g->critical_nodes);
|
||||||
g->critical_nodes = (uint8 *)malloc((g->nnodes*sizeof(uint8))/8 + 1);
|
g->critical_nodes = (cmph_uint8 *)malloc((g->nnodes*sizeof(cmph_uint8))/8 + 1);
|
||||||
g->ncritical_nodes = 0;
|
g->ncritical_nodes = 0;
|
||||||
memset(g->critical_nodes, 0, (g->nnodes*sizeof(uint8))/8 + 1);
|
memset(g->critical_nodes, 0, (g->nnodes*sizeof(cmph_uint8))/8 + 1);
|
||||||
DEBUGP("Looking for the 2-core in graph with %u vertices and %u edges\n", g->nnodes, g->nedges);
|
DEBUGP("Looking for the 2-core in graph with %u vertices and %u edges\n", g->nnodes, g->nedges);
|
||||||
for (v = 0; v < g->nnodes; ++v)
|
for (v = 0; v < g->nnodes; ++v)
|
||||||
{
|
{
|
||||||
@ -290,9 +290,9 @@ void graph_obtain_critical_nodes(graph_t *g) /* included -- Fabiano*/
|
|||||||
free(deleted);
|
free(deleted);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8 graph_contains_edge(graph_t *g, uint32 v1, uint32 v2) /* included -- Fabiano*/
|
cmph_uint8 cmph_graph_contains_edge(cmph_graph_t *g, cmph_uint32 v1, cmph_uint32 v2) /* included -- Fabiano*/
|
||||||
{
|
{
|
||||||
uint32 e;
|
cmph_uint32 e;
|
||||||
e = g->first[v1];
|
e = g->first[v1];
|
||||||
if(e == EMPTY) return 0;
|
if(e == EMPTY) return 0;
|
||||||
if (check_edge(g, e, v1, v2)) return 1;
|
if (check_edge(g, e, v1, v2)) return 1;
|
||||||
@ -305,27 +305,27 @@ uint8 graph_contains_edge(graph_t *g, uint32 v1, uint32 v2) /* included -- Fabia
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32 graph_vertex_id(graph_t *g, uint32 e, uint32 id) /* included -- Fabiano*/
|
cmph_uint32 cmph_graph_vertex_id(cmph_graph_t *g, cmph_uint32 e, cmph_uint32 id) /* included -- Fabiano*/
|
||||||
{
|
{
|
||||||
return (g->edges[e + id*g->nedges]);
|
return (g->edges[e + id*g->nedges]);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32 graph_ncritical_nodes(graph_t *g) /* included -- Fabiano*/
|
cmph_uint32 cmph_graph_ncritical_nodes(cmph_graph_t *g) /* included -- Fabiano*/
|
||||||
{
|
{
|
||||||
return g->ncritical_nodes;
|
return g->ncritical_nodes;
|
||||||
}
|
}
|
||||||
|
|
||||||
graph_iterator_t graph_neighbors_it(graph_t *g, uint32 v)
|
cmph_graph_iterator_t cmph_graph_neighbors_it(cmph_graph_t *g, cmph_uint32 v)
|
||||||
{
|
{
|
||||||
graph_iterator_t it;
|
cmph_graph_iterator_t it;
|
||||||
it.vertex = v;
|
it.vertex = v;
|
||||||
it.edge = g->first[v];
|
it.edge = g->first[v];
|
||||||
return it;
|
return it;
|
||||||
}
|
}
|
||||||
uint32 graph_next_neighbor(graph_t *g, graph_iterator_t* it)
|
cmph_uint32 cmph_graph_next_neighbor(cmph_graph_t *g, cmph_graph_iterator_t* it)
|
||||||
{
|
{
|
||||||
uint32 ret;
|
cmph_uint32 ret;
|
||||||
if(it->edge == EMPTY) return GRAPH_NO_NEIGHBOR;
|
if(it->edge == EMPTY) return CMPH_GRAPH_NO_NEIGHBOR;
|
||||||
if (g->edges[it->edge] == it->vertex) ret = g->edges[it->edge + g->nedges];
|
if (g->edges[it->edge] == it->vertex) ret = g->edges[it->edge + g->nedges];
|
||||||
else ret = g->edges[it->edge];
|
else ret = g->edges[it->edge];
|
||||||
it->edge = g->next[it->edge];
|
it->edge = g->next[it->edge];
|
||||||
|
42
src/graph.h
42
src/graph.h
@ -4,37 +4,37 @@
|
|||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include "cmph_types.h"
|
#include "cmph_types.h"
|
||||||
|
|
||||||
#define GRAPH_NO_NEIGHBOR UINT_MAX
|
#define CMPH_GRAPH_NO_NEIGHBOR UINT_MAX
|
||||||
|
|
||||||
typedef struct __graph_t graph_t;
|
typedef struct cmph__graph_t cmph_graph_t;
|
||||||
typedef struct __graph_iterator_t graph_iterator_t;
|
typedef struct cmph__graph_iterator_t cmph_graph_iterator_t;
|
||||||
struct __graph_iterator_t
|
struct cmph__graph_iterator_t
|
||||||
{
|
{
|
||||||
uint32 vertex;
|
cmph_uint32 vertex;
|
||||||
uint32 edge;
|
cmph_uint32 edge;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
graph_t *graph_new(uint32 nnodes, uint32 nedges);
|
cmph_graph_t *cmph_graph_new(cmph_uint32 nnodes, cmph_uint32 nedges);
|
||||||
void graph_destroy(graph_t *graph);
|
void cmph_graph_destroy(cmph_graph_t *graph);
|
||||||
|
|
||||||
void graph_add_edge(graph_t *g, uint32 v1, uint32 v2);
|
void cmph_graph_add_edge(cmph_graph_t *g, cmph_uint32 v1, cmph_uint32 v2);
|
||||||
void graph_del_edge(graph_t *g, uint32 v1, uint32 v2);
|
void cmph_graph_del_edge(cmph_graph_t *g, cmph_uint32 v1, cmph_uint32 v2);
|
||||||
void graph_clear_edges(graph_t *g);
|
void cmph_graph_clear_edges(cmph_graph_t *g);
|
||||||
uint32 graph_edge_id(graph_t *g, uint32 v1, uint32 v2);
|
cmph_uint32 cmph_graph_edge_id(cmph_graph_t *g, cmph_uint32 v1, cmph_uint32 v2);
|
||||||
uint8 graph_contains_edge(graph_t *g, uint32 v1, uint32 v2);
|
cmph_uint8 cmph_graph_contains_edge(cmph_graph_t *g, cmph_uint32 v1, cmph_uint32 v2);
|
||||||
|
|
||||||
graph_iterator_t graph_neighbors_it(graph_t *g, uint32 v);
|
cmph_graph_iterator_t cmph_graph_neighbors_it(cmph_graph_t *g, cmph_uint32 v);
|
||||||
uint32 graph_next_neighbor(graph_t *g, graph_iterator_t* it);
|
cmph_uint32 cmph_graph_next_neighbor(cmph_graph_t *g, cmph_graph_iterator_t* it);
|
||||||
|
|
||||||
void graph_obtain_critical_nodes(graph_t *g); /* included -- Fabiano*/
|
void cmph_graph_obtain_critical_nodes(cmph_graph_t *g); /* included -- Fabiano*/
|
||||||
uint8 graph_node_is_critical(graph_t * g, uint32 v); /* included -- Fabiano */
|
cmph_uint8 cmph_graph_node_is_critical(cmph_graph_t * g, cmph_uint32 v); /* included -- Fabiano */
|
||||||
uint32 graph_ncritical_nodes(graph_t *g); /* included -- Fabiano*/
|
cmph_uint32 cmph_graph_ncritical_nodes(cmph_graph_t *g); /* included -- Fabiano*/
|
||||||
uint32 graph_vertex_id(graph_t *g, uint32 e, uint32 id); /* included -- Fabiano*/
|
cmph_uint32 cmph_graph_vertex_id(cmph_graph_t *g, cmph_uint32 e, cmph_uint32 id); /* included -- Fabiano*/
|
||||||
|
|
||||||
int graph_is_cyclic(graph_t *g);
|
int cmph_graph_is_cyclic(cmph_graph_t *g);
|
||||||
|
|
||||||
void graph_print(graph_t *);
|
void cmph_graph_print(cmph_graph_t *);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
118
src/hash.c
118
src/hash.c
@ -7,26 +7,26 @@
|
|||||||
//#define DEBUG
|
//#define DEBUG
|
||||||
#include "debug.h"
|
#include "debug.h"
|
||||||
|
|
||||||
const char *hash_names[] = { "jenkins", "djb2", "sdbm", "fnv", "glib", "pjw", NULL };
|
const char *cmph_hash_names[] = { "jenkins", "djb2", "sdbm", "fnv", "glib", "pjw", NULL };
|
||||||
|
|
||||||
hash_state_t *hash_state_new(CMPH_HASH hashfunc, uint32 hashsize)
|
cmph_hash_state_t *cmph_hash_state_new(CMPH_HASH hashfunc, cmph_uint32 hashsize)
|
||||||
{
|
{
|
||||||
hash_state_t *state = NULL;
|
cmph_hash_state_t *state = NULL;
|
||||||
switch (hashfunc)
|
switch (hashfunc)
|
||||||
{
|
{
|
||||||
case HASH_JENKINS:
|
case CMPH_HASH_JENKINS:
|
||||||
DEBUGP("Jenkins function - %u\n", hashsize);
|
DEBUGP("Jenkins function - %u\n", hashsize);
|
||||||
state = (hash_state_t *)jenkins_state_new(hashsize);
|
state = (cmph_hash_state_t *)cmph_jenkins_state_new(hashsize);
|
||||||
DEBUGP("Jenkins function created\n");
|
DEBUGP("Jenkins function created\n");
|
||||||
break;
|
break;
|
||||||
case HASH_DJB2:
|
case CMPH_HASH_DJB2:
|
||||||
state = (hash_state_t *)djb2_state_new();
|
state = (cmph_hash_state_t *)cmph_djb2_state_new();
|
||||||
break;
|
break;
|
||||||
case HASH_SDBM:
|
case CMPH_HASH_SDBM:
|
||||||
state = (hash_state_t *)sdbm_state_new();
|
state = (cmph_hash_state_t *)cmph_sdbm_state_new();
|
||||||
break;
|
break;
|
||||||
case HASH_FNV:
|
case CMPH_HASH_FNV:
|
||||||
state = (hash_state_t *)fnv_state_new();
|
state = (cmph_hash_state_t *)cmph_fnv_state_new();
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
assert(0);
|
assert(0);
|
||||||
@ -34,18 +34,18 @@ hash_state_t *hash_state_new(CMPH_HASH hashfunc, uint32 hashsize)
|
|||||||
state->hashfunc = hashfunc;
|
state->hashfunc = hashfunc;
|
||||||
return state;
|
return state;
|
||||||
}
|
}
|
||||||
uint32 hash(hash_state_t *state, const char *key, uint32 keylen)
|
cmph_uint32 cmph_hash(cmph_hash_state_t *state, const char *key, cmph_uint32 keylen)
|
||||||
{
|
{
|
||||||
switch (state->hashfunc)
|
switch (state->hashfunc)
|
||||||
{
|
{
|
||||||
case HASH_JENKINS:
|
case CMPH_HASH_JENKINS:
|
||||||
return jenkins_hash((jenkins_state_t *)state, key, keylen);
|
return cmph_jenkins_hash((cmph_jenkins_state_t *)state, key, keylen);
|
||||||
case HASH_DJB2:
|
case CMPH_HASH_DJB2:
|
||||||
return djb2_hash((djb2_state_t *)state, key, keylen);
|
return cmph_djb2_hash((cmph_djb2_state_t *)state, key, keylen);
|
||||||
case HASH_SDBM:
|
case CMPH_HASH_SDBM:
|
||||||
return sdbm_hash((sdbm_state_t *)state, key, keylen);
|
return cmph_sdbm_hash((cmph_sdbm_state_t *)state, key, keylen);
|
||||||
case HASH_FNV:
|
case CMPH_HASH_FNV:
|
||||||
return fnv_hash((fnv_state_t *)state, key, keylen);
|
return cmph_fnv_hash((cmph_fnv_state_t *)state, key, keylen);
|
||||||
default:
|
default:
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
@ -53,84 +53,84 @@ uint32 hash(hash_state_t *state, const char *key, uint32 keylen)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void hash_state_dump(hash_state_t *state, char **buf, uint32 *buflen)
|
void cmph_hash_state_dump(cmph_hash_state_t *state, char **buf, cmph_uint32 *buflen)
|
||||||
{
|
{
|
||||||
char *algobuf;
|
char *algobuf;
|
||||||
switch (state->hashfunc)
|
switch (state->hashfunc)
|
||||||
{
|
{
|
||||||
case HASH_JENKINS:
|
case CMPH_HASH_JENKINS:
|
||||||
jenkins_state_dump((jenkins_state_t *)state, &algobuf, buflen);
|
cmph_jenkins_state_dump((cmph_jenkins_state_t *)state, &algobuf, buflen);
|
||||||
if (*buflen == UINT_MAX) return;
|
if (*buflen == UINT_MAX) return;
|
||||||
break;
|
break;
|
||||||
case HASH_DJB2:
|
case CMPH_HASH_DJB2:
|
||||||
djb2_state_dump((djb2_state_t *)state, &algobuf, buflen);
|
cmph_djb2_state_dump((cmph_djb2_state_t *)state, &algobuf, buflen);
|
||||||
if (*buflen == UINT_MAX) return;
|
if (*buflen == UINT_MAX) return;
|
||||||
break;
|
break;
|
||||||
case HASH_SDBM:
|
case CMPH_HASH_SDBM:
|
||||||
sdbm_state_dump((sdbm_state_t *)state, &algobuf, buflen);
|
cmph_sdbm_state_dump((cmph_sdbm_state_t *)state, &algobuf, buflen);
|
||||||
if (*buflen == UINT_MAX) return;
|
if (*buflen == UINT_MAX) return;
|
||||||
break;
|
break;
|
||||||
case HASH_FNV:
|
case CMPH_HASH_FNV:
|
||||||
fnv_state_dump((fnv_state_t *)state, &algobuf, buflen);
|
cmph_fnv_state_dump((cmph_fnv_state_t *)state, &algobuf, buflen);
|
||||||
if (*buflen == UINT_MAX) return;
|
if (*buflen == UINT_MAX) return;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
*buf = malloc(strlen(hash_names[state->hashfunc]) + 1 + *buflen);
|
*buf = malloc(strlen(cmph_hash_names[state->hashfunc]) + 1 + *buflen);
|
||||||
memcpy(*buf, hash_names[state->hashfunc], strlen(hash_names[state->hashfunc]) + 1);
|
memcpy(*buf, cmph_hash_names[state->hashfunc], strlen(cmph_hash_names[state->hashfunc]) + 1);
|
||||||
DEBUGP("Algobuf is %u\n", *(uint32 *)algobuf);
|
DEBUGP("Algobuf is %u\n", *(cmph_uint32 *)algobuf);
|
||||||
memcpy(*buf + strlen(hash_names[state->hashfunc]) + 1, algobuf, *buflen);
|
memcpy(*buf + strlen(cmph_hash_names[state->hashfunc]) + 1, algobuf, *buflen);
|
||||||
*buflen = (uint32)strlen(hash_names[state->hashfunc]) + 1 + *buflen;
|
*buflen = (cmph_uint32)strlen(cmph_hash_names[state->hashfunc]) + 1 + *buflen;
|
||||||
free(algobuf);
|
free(algobuf);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
hash_state_t *hash_state_load(const char *buf, uint32 buflen)
|
cmph_hash_state_t *cmph_hash_state_load(const char *buf, cmph_uint32 buflen)
|
||||||
{
|
{
|
||||||
uint32 i;
|
cmph_uint32 i;
|
||||||
uint32 offset;
|
cmph_uint32 offset;
|
||||||
CMPH_HASH hashfunc = HASH_COUNT;
|
CMPH_HASH hashfunc = CMPH_HASH_COUNT;
|
||||||
for (i = 0; i < HASH_COUNT; ++i)
|
for (i = 0; i < CMPH_HASH_COUNT; ++i)
|
||||||
{
|
{
|
||||||
if (strcmp(buf, hash_names[i]) == 0)
|
if (strcmp(buf, cmph_hash_names[i]) == 0)
|
||||||
{
|
{
|
||||||
hashfunc = i;
|
hashfunc = i;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (hashfunc == HASH_COUNT) return NULL;
|
if (hashfunc == CMPH_HASH_COUNT) return NULL;
|
||||||
offset = (uint32)strlen(hash_names[hashfunc]) + 1;
|
offset = (cmph_uint32)strlen(cmph_hash_names[hashfunc]) + 1;
|
||||||
switch (hashfunc)
|
switch (hashfunc)
|
||||||
{
|
{
|
||||||
case HASH_JENKINS:
|
case CMPH_HASH_JENKINS:
|
||||||
return (hash_state_t *)jenkins_state_load(buf + offset, buflen - offset);
|
return (cmph_hash_state_t *)cmph_jenkins_state_load(buf + offset, buflen - offset);
|
||||||
case HASH_DJB2:
|
case CMPH_HASH_DJB2:
|
||||||
return (hash_state_t *)djb2_state_load(buf + offset, buflen - offset);
|
return (cmph_hash_state_t *)cmph_djb2_state_load(buf + offset, buflen - offset);
|
||||||
case HASH_SDBM:
|
case CMPH_HASH_SDBM:
|
||||||
return (hash_state_t *)sdbm_state_load(buf + offset, buflen - offset);
|
return (cmph_hash_state_t *)cmph_sdbm_state_load(buf + offset, buflen - offset);
|
||||||
case HASH_FNV:
|
case CMPH_HASH_FNV:
|
||||||
return (hash_state_t *)fnv_state_load(buf + offset, buflen - offset);
|
return (cmph_hash_state_t *)cmph_fnv_state_load(buf + offset, buflen - offset);
|
||||||
default:
|
default:
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
void hash_state_destroy(hash_state_t *state)
|
void cmph_hash_state_destroy(cmph_hash_state_t *state)
|
||||||
{
|
{
|
||||||
switch (state->hashfunc)
|
switch (state->hashfunc)
|
||||||
{
|
{
|
||||||
case HASH_JENKINS:
|
case CMPH_HASH_JENKINS:
|
||||||
jenkins_state_destroy((jenkins_state_t *)state);
|
cmph_jenkins_state_destroy((cmph_jenkins_state_t *)state);
|
||||||
break;
|
break;
|
||||||
case HASH_DJB2:
|
case CMPH_HASH_DJB2:
|
||||||
djb2_state_destroy((djb2_state_t *)state);
|
cmph_djb2_state_destroy((cmph_djb2_state_t *)state);
|
||||||
break;
|
break;
|
||||||
case HASH_SDBM:
|
case CMPH_HASH_SDBM:
|
||||||
sdbm_state_destroy((sdbm_state_t *)state);
|
cmph_sdbm_state_destroy((cmph_sdbm_state_t *)state);
|
||||||
break;
|
break;
|
||||||
case HASH_FNV:
|
case CMPH_HASH_FNV:
|
||||||
fnv_state_destroy((fnv_state_t *)state);
|
cmph_fnv_state_destroy((cmph_fnv_state_t *)state);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
assert(0);
|
assert(0);
|
||||||
|
12
src/hash.h
12
src/hash.h
@ -3,12 +3,12 @@
|
|||||||
|
|
||||||
#include "cmph_types.h"
|
#include "cmph_types.h"
|
||||||
|
|
||||||
typedef union __hash_state_t hash_state_t;
|
typedef union cmph__hash_state_t cmph_hash_state_t;
|
||||||
|
|
||||||
hash_state_t *hash_state_new(CMPH_HASH, uint32 hashsize);
|
cmph_hash_state_t *cmph_hash_state_new(CMPH_HASH, cmph_uint32 hashsize);
|
||||||
uint32 hash(hash_state_t *state, const char *key, uint32 keylen);
|
cmph_uint32 cmph_hash(cmph_hash_state_t *state, const char *key, cmph_uint32 keylen);
|
||||||
void hash_state_dump(hash_state_t *state, char **buf, uint32 *buflen);
|
void cmph_hash_state_dump(cmph_hash_state_t *state, char **buf, cmph_uint32 *buflen);
|
||||||
hash_state_t *hash_state_load(const char *buf, uint32 buflen);
|
cmph_hash_state_t *cmph_hash_state_load(const char *buf, cmph_uint32 buflen);
|
||||||
void hash_state_destroy(hash_state_t *state);
|
void cmph_hash_state_destroy(cmph_hash_state_t *state);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -6,13 +6,13 @@
|
|||||||
#include "djb2_hash.h"
|
#include "djb2_hash.h"
|
||||||
#include "sdbm_hash.h"
|
#include "sdbm_hash.h"
|
||||||
#include "fnv_hash.h"
|
#include "fnv_hash.h"
|
||||||
union __hash_state_t
|
union cmph__hash_state_t
|
||||||
{
|
{
|
||||||
CMPH_HASH hashfunc;
|
CMPH_HASH hashfunc;
|
||||||
jenkins_state_t jenkins;
|
cmph_jenkins_state_t jenkins;
|
||||||
djb2_state_t djb2;
|
cmph_djb2_state_t djb2;
|
||||||
sdbm_state_t sdbm;
|
cmph_sdbm_state_t sdbm;
|
||||||
fnv_state_t fnv;
|
cmph_fnv_state_t fnv;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -10,7 +10,7 @@
|
|||||||
//#define DEBUG
|
//#define DEBUG
|
||||||
#include "debug.h"
|
#include "debug.h"
|
||||||
|
|
||||||
#define hashsize(n) ((uint32)1<<(n))
|
#define hashsize(n) ((cmph_uint32)1<<(n))
|
||||||
#define hashmask(n) (hashsize(n)-1)
|
#define hashmask(n) (hashsize(n)-1)
|
||||||
|
|
||||||
|
|
||||||
@ -73,7 +73,7 @@ use a bitmask. For example, if you need only 10 bits, do
|
|||||||
h = (h & hashmask(10));
|
h = (h & hashmask(10));
|
||||||
In which case, the hash table should have hashsize(10) elements.
|
In which case, the hash table should have hashsize(10) elements.
|
||||||
|
|
||||||
If you are hashing n strings (uint8 **)k, do it like this:
|
If you are hashing n strings (cmph_uint8 **)k, do it like this:
|
||||||
for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
|
for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
|
||||||
|
|
||||||
By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
|
By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
|
||||||
@ -84,25 +84,25 @@ Use for hash table lookup, or anything where one collision in 2^^32 is
|
|||||||
acceptable. Do NOT use for cryptographic purposes.
|
acceptable. Do NOT use for cryptographic purposes.
|
||||||
--------------------------------------------------------------------
|
--------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
jenkins_state_t *jenkins_state_new(uint32 size) //size of hash table
|
cmph_jenkins_state_t *cmph_jenkins_state_new(cmph_uint32 size) //size of hash table
|
||||||
{
|
{
|
||||||
jenkins_state_t *state = (jenkins_state_t *)malloc(sizeof(jenkins_state_t));
|
cmph_jenkins_state_t *state = (cmph_jenkins_state_t *)malloc(sizeof(cmph_jenkins_state_t));
|
||||||
DEBUGP("Initializing jenkins hash\n");
|
DEBUGP("Initializing jenkins hash\n");
|
||||||
state->seed = rand() % size;
|
state->seed = rand() % size;
|
||||||
state->nbits = (uint32)ceil(log(size)/M_LOG2E);
|
state->nbits = (cmph_uint32)ceil(log(size)/M_LOG2E);
|
||||||
state->size = size;
|
state->size = size;
|
||||||
DEBUGP("Initialized jenkins with size %u, nbits %u and seed %u\n", size, state->nbits, state->seed);
|
DEBUGP("Initialized jenkins with size %u, nbits %u and seed %u\n", size, state->nbits, state->seed);
|
||||||
return state;
|
return state;
|
||||||
}
|
}
|
||||||
void jenkins_state_destroy(jenkins_state_t *state)
|
void cmph_jenkins_state_destroy(cmph_jenkins_state_t *state)
|
||||||
{
|
{
|
||||||
free(state);
|
free(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32 jenkins_hash(jenkins_state_t *state, const char *k, uint32 keylen)
|
cmph_uint32 cmph_jenkins_hash(cmph_jenkins_state_t *state, const char *k, cmph_uint32 keylen)
|
||||||
{
|
{
|
||||||
uint32 a, b, c;
|
cmph_uint32 a, b, c;
|
||||||
uint32 len, length;
|
cmph_uint32 len, length;
|
||||||
|
|
||||||
/* Set up the internal state */
|
/* Set up the internal state */
|
||||||
length = keylen;
|
length = keylen;
|
||||||
@ -113,9 +113,9 @@ uint32 jenkins_hash(jenkins_state_t *state, const char *k, uint32 keylen)
|
|||||||
/*---------------------------------------- handle most of the key */
|
/*---------------------------------------- handle most of the key */
|
||||||
while (len >= 12)
|
while (len >= 12)
|
||||||
{
|
{
|
||||||
a += (k[0] +((uint32)k[1]<<8) +((uint32)k[2]<<16) +((uint32)k[3]<<24));
|
a += (k[0] +((cmph_uint32)k[1]<<8) +((cmph_uint32)k[2]<<16) +((cmph_uint32)k[3]<<24));
|
||||||
b += (k[4] +((uint32)k[5]<<8) +((uint32)k[6]<<16) +((uint32)k[7]<<24));
|
b += (k[4] +((cmph_uint32)k[5]<<8) +((cmph_uint32)k[6]<<16) +((cmph_uint32)k[7]<<24));
|
||||||
c += (k[8] +((uint32)k[9]<<8) +((uint32)k[10]<<16)+((uint32)k[11]<<24));
|
c += (k[8] +((cmph_uint32)k[9]<<8) +((cmph_uint32)k[10]<<16)+((cmph_uint32)k[11]<<24));
|
||||||
mix(a,b,c);
|
mix(a,b,c);
|
||||||
k += 12; len -= 12;
|
k += 12; len -= 12;
|
||||||
}
|
}
|
||||||
@ -125,26 +125,26 @@ uint32 jenkins_hash(jenkins_state_t *state, const char *k, uint32 keylen)
|
|||||||
switch(len) /* all the case statements fall through */
|
switch(len) /* all the case statements fall through */
|
||||||
{
|
{
|
||||||
case 11:
|
case 11:
|
||||||
c +=((uint32)k[10]<<24);
|
c +=((cmph_uint32)k[10]<<24);
|
||||||
case 10:
|
case 10:
|
||||||
c +=((uint32)k[9]<<16);
|
c +=((cmph_uint32)k[9]<<16);
|
||||||
case 9 :
|
case 9 :
|
||||||
c +=((uint32)k[8]<<8);
|
c +=((cmph_uint32)k[8]<<8);
|
||||||
/* the first byte of c is reserved for the length */
|
/* the first byte of c is reserved for the length */
|
||||||
case 8 :
|
case 8 :
|
||||||
b +=((uint32)k[7]<<24);
|
b +=((cmph_uint32)k[7]<<24);
|
||||||
case 7 :
|
case 7 :
|
||||||
b +=((uint32)k[6]<<16);
|
b +=((cmph_uint32)k[6]<<16);
|
||||||
case 6 :
|
case 6 :
|
||||||
b +=((uint32)k[5]<<8);
|
b +=((cmph_uint32)k[5]<<8);
|
||||||
case 5 :
|
case 5 :
|
||||||
b +=k[4];
|
b +=k[4];
|
||||||
case 4 :
|
case 4 :
|
||||||
a +=((uint32)k[3]<<24);
|
a +=((cmph_uint32)k[3]<<24);
|
||||||
case 3 :
|
case 3 :
|
||||||
a +=((uint32)k[2]<<16);
|
a +=((cmph_uint32)k[2]<<16);
|
||||||
case 2 :
|
case 2 :
|
||||||
a +=((uint32)k[1]<<8);
|
a +=((cmph_uint32)k[1]<<8);
|
||||||
case 1 :
|
case 1 :
|
||||||
a +=k[0];
|
a +=k[0];
|
||||||
/* case 0: nothing left to add */
|
/* case 0: nothing left to add */
|
||||||
@ -162,29 +162,29 @@ uint32 jenkins_hash(jenkins_state_t *state, const char *k, uint32 keylen)
|
|||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
void jenkins_state_dump(jenkins_state_t *state, char **buf, uint32 *buflen)
|
void cmph_jenkins_state_dump(cmph_jenkins_state_t *state, char **buf, cmph_uint32 *buflen)
|
||||||
{
|
{
|
||||||
*buflen = sizeof(uint32)*3;
|
*buflen = sizeof(cmph_uint32)*3;
|
||||||
*buf = malloc(*buflen);
|
*buf = malloc(*buflen);
|
||||||
if (!*buf)
|
if (!*buf)
|
||||||
{
|
{
|
||||||
*buflen = UINT_MAX;
|
*buflen = UINT_MAX;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
memcpy(*buf, &(state->seed), sizeof(uint32));
|
memcpy(*buf, &(state->seed), sizeof(cmph_uint32));
|
||||||
memcpy(*buf + sizeof(uint32), &(state->nbits), sizeof(uint32));
|
memcpy(*buf + sizeof(cmph_uint32), &(state->nbits), sizeof(cmph_uint32));
|
||||||
memcpy(*buf + sizeof(uint32)*2, &(state->size), sizeof(uint32));
|
memcpy(*buf + sizeof(cmph_uint32)*2, &(state->size), sizeof(cmph_uint32));
|
||||||
DEBUGP("Dumped jenkins state with seed %u\n", state->seed);
|
DEBUGP("Dumped jenkins state with seed %u\n", state->seed);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
jenkins_state_t *jenkins_state_load(const char *buf, uint32 buflen)
|
cmph_jenkins_state_t *cmph_jenkins_state_load(const char *buf, cmph_uint32 buflen)
|
||||||
{
|
{
|
||||||
jenkins_state_t *state = (jenkins_state_t *)malloc(sizeof(jenkins_state_t));
|
cmph_jenkins_state_t *state = (cmph_jenkins_state_t *)malloc(sizeof(cmph_jenkins_state_t));
|
||||||
state->seed = *(uint32 *)buf;
|
state->seed = *(cmph_uint32 *)buf;
|
||||||
state->nbits = *(((uint32 *)buf) + 1);
|
state->nbits = *(((cmph_uint32 *)buf) + 1);
|
||||||
state->size = *(((uint32 *)buf) + 2);
|
state->size = *(((cmph_uint32 *)buf) + 2);
|
||||||
state->hashfunc = HASH_JENKINS;
|
state->hashfunc = CMPH_HASH_JENKINS;
|
||||||
DEBUGP("Loaded jenkins state with seed %u\n", state->seed);
|
DEBUGP("Loaded jenkins state with seed %u\n", state->seed);
|
||||||
return state;
|
return state;
|
||||||
}
|
}
|
||||||
|
@ -3,18 +3,18 @@
|
|||||||
|
|
||||||
#include "hash.h"
|
#include "hash.h"
|
||||||
|
|
||||||
typedef struct __jenkins_state_t
|
typedef struct cmph__jenkins_state_t
|
||||||
{
|
{
|
||||||
CMPH_HASH hashfunc;
|
CMPH_HASH hashfunc;
|
||||||
uint32 seed;
|
cmph_uint32 seed;
|
||||||
uint32 nbits;
|
cmph_uint32 nbits;
|
||||||
uint32 size;
|
cmph_uint32 size;
|
||||||
} jenkins_state_t;
|
} cmph_jenkins_state_t;
|
||||||
|
|
||||||
jenkins_state_t *jenkins_state_new(uint32 size); //size of hash table
|
cmph_jenkins_state_t *cmph_jenkins_state_new(cmph_uint32 size); //size of hash table
|
||||||
uint32 jenkins_hash(jenkins_state_t *state, const char *k, uint32 keylen);
|
cmph_uint32 cmph_jenkins_hash(cmph_jenkins_state_t *state, const char *k, cmph_uint32 keylen);
|
||||||
void jenkins_state_dump(jenkins_state_t *state, char **buf, uint32 *buflen);
|
void cmph_jenkins_state_dump(cmph_jenkins_state_t *state, char **buf, cmph_uint32 *buflen);
|
||||||
jenkins_state_t *jenkins_state_load(const char *buf, uint32 buflen);
|
cmph_jenkins_state_t *cmph_jenkins_state_load(const char *buf, cmph_uint32 buflen);
|
||||||
void jenkins_state_destroy(jenkins_state_t *state);
|
void cmph_jenkins_state_destroy(cmph_jenkins_state_t *state);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
78
src/main.c
78
src/main.c
@ -23,15 +23,15 @@ void usage(const char *prg)
|
|||||||
}
|
}
|
||||||
void usage_long(const char *prg)
|
void usage_long(const char *prg)
|
||||||
{
|
{
|
||||||
uint32 i;
|
cmph_uint32 i;
|
||||||
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k] [-g [-s seed] ] [-m file.mph] [-a algorithm] keysfile\n", prg);
|
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k] [-g [-s seed] ] [-m file.mph] [-a algorithm] keysfile\n", prg);
|
||||||
fprintf(stderr, "Minimum perfect hashing tool\n\n");
|
fprintf(stderr, "Minimum perfect hashing tool\n\n");
|
||||||
fprintf(stderr, " -h\t print this help message\n");
|
fprintf(stderr, " -h\t print this help message\n");
|
||||||
fprintf(stderr, " -c\t c value that determines the number of vertices in the graph\n");
|
fprintf(stderr, " -c\t c value that determines the number of vertices in the graph\n");
|
||||||
fprintf(stderr, " -a\t algorithm - valid values are\n");
|
fprintf(stderr, " -a\t algorithm - valid values are\n");
|
||||||
for (i = 0; i < MPH_COUNT; ++i) fprintf(stderr, " \t * %s\n", mph_names[i]);
|
for (i = 0; i < CMPH_COUNT; ++i) fprintf(stderr, " \t * %s\n", cmph_names[i]);
|
||||||
fprintf(stderr, " -f\t hash function (may be used multiple times) - valid values are\n");
|
fprintf(stderr, " -f\t hash function (may be used multiple times) - valid values are\n");
|
||||||
for (i = 0; i < HASH_COUNT; ++i) fprintf(stderr, " \t * %s\n", hash_names[i]);
|
for (i = 0; i < CMPH_HASH_COUNT; ++i) fprintf(stderr, " \t * %s\n", cmph_hash_names[i]);
|
||||||
fprintf(stderr, " -V\t print version number and exit\n");
|
fprintf(stderr, " -V\t print version number and exit\n");
|
||||||
fprintf(stderr, " -v\t increase verbosity (may be used multiple times)\n");
|
fprintf(stderr, " -v\t increase verbosity (may be used multiple times)\n");
|
||||||
fprintf(stderr, " -k\t number of keys\n");
|
fprintf(stderr, " -k\t number of keys\n");
|
||||||
@ -41,7 +41,7 @@ void usage_long(const char *prg)
|
|||||||
fprintf(stderr, " keysfile\t line separated file with keys\n");
|
fprintf(stderr, " keysfile\t line separated file with keys\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
static int key_read(void *data, char **key, uint32 *keylen)
|
static int key_read(void *data, char **key, cmph_uint32 *keylen)
|
||||||
{
|
{
|
||||||
FILE *fd = (FILE *)data;
|
FILE *fd = (FILE *)data;
|
||||||
*key = NULL;
|
*key = NULL;
|
||||||
@ -54,7 +54,7 @@ static int key_read(void *data, char **key, uint32 *keylen)
|
|||||||
if (feof(fd)) return -1;
|
if (feof(fd)) return -1;
|
||||||
*key = (char *)realloc(*key, *keylen + strlen(buf) + 1);
|
*key = (char *)realloc(*key, *keylen + strlen(buf) + 1);
|
||||||
memcpy(*key + *keylen, buf, strlen(buf));
|
memcpy(*key + *keylen, buf, strlen(buf));
|
||||||
*keylen += (uint32)strlen(buf);
|
*keylen += (cmph_uint32)strlen(buf);
|
||||||
if (buf[strlen(buf) - 1] != '\n') continue;
|
if (buf[strlen(buf) - 1] != '\n') continue;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -66,7 +66,7 @@ static int key_read(void *data, char **key, uint32 *keylen)
|
|||||||
return *keylen;
|
return *keylen;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void key_dispose(void *data, char *key, uint32 keylen)
|
static void key_dispose(void *data, char *key, cmph_uint32 keylen)
|
||||||
{
|
{
|
||||||
free(key);
|
free(key);
|
||||||
}
|
}
|
||||||
@ -76,9 +76,9 @@ static void key_rewind(void *data)
|
|||||||
rewind(fd);
|
rewind(fd);
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32 count_keys(FILE *fd)
|
static cmph_uint32 count_keys(FILE *fd)
|
||||||
{
|
{
|
||||||
uint32 count = 0;
|
cmph_uint32 count = 0;
|
||||||
rewind(fd);
|
rewind(fd);
|
||||||
while(1)
|
while(1)
|
||||||
{
|
{
|
||||||
@ -100,17 +100,17 @@ int main(int argc, char **argv)
|
|||||||
FILE *mphf_fd = stdout;
|
FILE *mphf_fd = stdout;
|
||||||
const char *keys_file = NULL;
|
const char *keys_file = NULL;
|
||||||
FILE *keys_fd;
|
FILE *keys_fd;
|
||||||
uint32 nkeys = UINT_MAX;
|
cmph_uint32 nkeys = UINT_MAX;
|
||||||
uint32 seed = UINT_MAX;
|
cmph_uint32 seed = UINT_MAX;
|
||||||
CMPH_HASH *hashes = NULL;
|
CMPH_HASH *hashes = NULL;
|
||||||
uint32 nhashes = 0;
|
cmph_uint32 nhashes = 0;
|
||||||
uint32 i;
|
cmph_uint32 i;
|
||||||
MPH_ALGO mph_algo = MPH_CZECH;
|
CMPH_ALGO mph_algo = CMPH_CZECH;
|
||||||
float c = 2.09;
|
float c = 2.09;
|
||||||
mph_t *mph = NULL;
|
cmph_mph_t *mph = NULL;
|
||||||
mphf_t *mphf = NULL;
|
cmph_mphf_t *mphf = NULL;
|
||||||
|
|
||||||
key_source_t source;
|
cmph_key_source_t source;
|
||||||
|
|
||||||
while (1)
|
while (1)
|
||||||
{
|
{
|
||||||
@ -166,9 +166,9 @@ int main(int argc, char **argv)
|
|||||||
case 'a':
|
case 'a':
|
||||||
{
|
{
|
||||||
char valid = 0;
|
char valid = 0;
|
||||||
for (i = 0; i < MPH_COUNT; ++i)
|
for (i = 0; i < CMPH_COUNT; ++i)
|
||||||
{
|
{
|
||||||
if (strcmp(mph_names[i], optarg) == 0)
|
if (strcmp(cmph_names[i], optarg) == 0)
|
||||||
{
|
{
|
||||||
mph_algo = i;
|
mph_algo = i;
|
||||||
valid = 1;
|
valid = 1;
|
||||||
@ -185,13 +185,13 @@ int main(int argc, char **argv)
|
|||||||
case 'f':
|
case 'f':
|
||||||
{
|
{
|
||||||
char valid = 0;
|
char valid = 0;
|
||||||
for (i = 0; i < HASH_COUNT; ++i)
|
for (i = 0; i < CMPH_HASH_COUNT; ++i)
|
||||||
{
|
{
|
||||||
if (strcmp(hash_names[i], optarg) == 0)
|
if (strcmp(cmph_hash_names[i], optarg) == 0)
|
||||||
{
|
{
|
||||||
hashes = (CMPH_HASH *)realloc(hashes, sizeof(CMPH_HASH) * ( nhashes + 2 ));
|
hashes = (CMPH_HASH *)realloc(hashes, sizeof(CMPH_HASH) * ( nhashes + 2 ));
|
||||||
hashes[nhashes] = i;
|
hashes[nhashes] = i;
|
||||||
hashes[nhashes + 1] = HASH_COUNT;
|
hashes[nhashes + 1] = CMPH_HASH_COUNT;
|
||||||
++nhashes;
|
++nhashes;
|
||||||
valid = 1;
|
valid = 1;
|
||||||
break;
|
break;
|
||||||
@ -216,7 +216,7 @@ int main(int argc, char **argv)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
keys_file = argv[optind];
|
keys_file = argv[optind];
|
||||||
if (seed == UINT_MAX) seed = (uint32)time(NULL);
|
if (seed == UINT_MAX) seed = (cmph_uint32)time(NULL);
|
||||||
srand(seed);
|
srand(seed);
|
||||||
|
|
||||||
if (mphf_file == NULL)
|
if (mphf_file == NULL)
|
||||||
@ -234,7 +234,7 @@ int main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
source.data = (void *)keys_fd;
|
source.data = (void *)keys_fd;
|
||||||
if (seed == UINT_MAX) seed = (uint32)time(NULL);
|
if (seed == UINT_MAX) seed = (cmph_uint32)time(NULL);
|
||||||
if(nkeys == UINT_MAX) source.nkeys = count_keys(keys_fd);
|
if(nkeys == UINT_MAX) source.nkeys = count_keys(keys_fd);
|
||||||
else source.nkeys = nkeys;
|
else source.nkeys = nkeys;
|
||||||
source.read = key_read;
|
source.read = key_read;
|
||||||
@ -245,17 +245,17 @@ int main(int argc, char **argv)
|
|||||||
{
|
{
|
||||||
//Create mphf
|
//Create mphf
|
||||||
|
|
||||||
mph = mph_new(mph_algo, &source);
|
mph = cmph_mph_new(mph_algo, &source);
|
||||||
if (nhashes) mph_set_hashfuncs(mph, hashes);
|
if (nhashes) cmph_mph_set_hashfuncs(mph, hashes);
|
||||||
mph_set_verbosity(mph, verbosity);
|
cmph_mph_set_verbosity(mph, verbosity);
|
||||||
if(mph_algo == MPH_BMZ && c >= 2.0) c=1.15;
|
if(mph_algo == CMPH_BMZ && c >= 2.0) c=1.15;
|
||||||
if (c != 0) mph_set_graphsize(mph, c);
|
if (c != 0) cmph_mph_set_graphsize(mph, c);
|
||||||
mphf = mph_create(mph);
|
mphf = cmph_mph_create(mph);
|
||||||
|
|
||||||
if (mphf == NULL)
|
if (mphf == NULL)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Unable to create minimum perfect hashing function\n");
|
fprintf(stderr, "Unable to create minimum perfect hashing function\n");
|
||||||
mph_destroy(mph);
|
cmph_mph_destroy(mph);
|
||||||
free(mphf_file);
|
free(mphf_file);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -267,13 +267,13 @@ int main(int argc, char **argv)
|
|||||||
free(mphf_file);
|
free(mphf_file);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
mphf_dump(mphf, mphf_fd);
|
cmph_mphf_dump(mphf, mphf_fd);
|
||||||
mphf_destroy(mphf);
|
cmph_mphf_destroy(mphf);
|
||||||
fclose(mphf_fd);
|
fclose(mphf_fd);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
uint8 * hashtable = NULL;
|
cmph_uint8 * hashtable = NULL;
|
||||||
mphf_fd = fopen(mphf_file, "r");
|
mphf_fd = fopen(mphf_file, "r");
|
||||||
if (mphf_fd == NULL)
|
if (mphf_fd == NULL)
|
||||||
{
|
{
|
||||||
@ -281,7 +281,7 @@ int main(int argc, char **argv)
|
|||||||
free(mphf_file);
|
free(mphf_file);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
mphf = mphf_load(mphf_fd);
|
mphf = cmph_mphf_load(mphf_fd);
|
||||||
fclose(mphf_fd);
|
fclose(mphf_fd);
|
||||||
if (!mphf)
|
if (!mphf)
|
||||||
{
|
{
|
||||||
@ -289,16 +289,16 @@ int main(int argc, char **argv)
|
|||||||
free(mphf_file);
|
free(mphf_file);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
hashtable = (uint8*)malloc(source.nkeys*sizeof(uint8));
|
hashtable = (cmph_uint8*)malloc(source.nkeys*sizeof(cmph_uint8));
|
||||||
memset(hashtable, 0, source.nkeys);
|
memset(hashtable, 0, source.nkeys);
|
||||||
//check all keys
|
//check all keys
|
||||||
for (i = 0; i < source.nkeys; ++i)
|
for (i = 0; i < source.nkeys; ++i)
|
||||||
{
|
{
|
||||||
uint32 h;
|
cmph_uint32 h;
|
||||||
char *buf;
|
char *buf;
|
||||||
uint32 buflen = 0;
|
cmph_uint32 buflen = 0;
|
||||||
source.read(source.data, &buf, &buflen);
|
source.read(source.data, &buf, &buflen);
|
||||||
h = mphf_search(mphf, buf, buflen);
|
h = cmph_mphf_search(mphf, buf, buflen);
|
||||||
if(hashtable[h])fprintf(stderr, "collision: %u\n",h);
|
if(hashtable[h])fprintf(stderr, "collision: %u\n",h);
|
||||||
assert(hashtable[h]==0);
|
assert(hashtable[h]==0);
|
||||||
hashtable[h] = 1;
|
hashtable[h] = 1;
|
||||||
@ -308,7 +308,7 @@ int main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
source.dispose(source.data, buf, buflen);
|
source.dispose(source.data, buf, buflen);
|
||||||
}
|
}
|
||||||
mphf_destroy(mphf);
|
cmph_mphf_destroy(mphf);
|
||||||
free(hashtable);
|
free(hashtable);
|
||||||
}
|
}
|
||||||
fclose(keys_fd);
|
fclose(keys_fd);
|
||||||
|
@ -1,23 +1,23 @@
|
|||||||
#include "sdbm_hash.h"
|
#include "sdbm_hash.h"
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
sdbm_state_t *sdbm_state_new()
|
cmph_sdbm_state_t *cmph_sdbm_state_new()
|
||||||
{
|
{
|
||||||
sdbm_state_t *state = (sdbm_state_t *)malloc(sizeof(sdbm_state_t));
|
cmph_sdbm_state_t *state = (cmph_sdbm_state_t *)malloc(sizeof(cmph_sdbm_state_t));
|
||||||
state->hashfunc = HASH_SDBM;
|
state->hashfunc = CMPH_HASH_SDBM;
|
||||||
return state;
|
return state;
|
||||||
}
|
}
|
||||||
|
|
||||||
void sdbm_state_destroy(sdbm_state_t *state)
|
void cmph_sdbm_state_destroy(cmph_sdbm_state_t *state)
|
||||||
{
|
{
|
||||||
free(state);
|
free(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32 sdbm_hash(sdbm_state_t *state, const char *k, uint32 keylen)
|
cmph_uint32 cmph_sdbm_hash(cmph_sdbm_state_t *state, const char *k, cmph_uint32 keylen)
|
||||||
{
|
{
|
||||||
register uint32 hash = 0;
|
register cmph_uint32 hash = 0;
|
||||||
const unsigned char *ptr = k;
|
const unsigned char *ptr = k;
|
||||||
uint32 i = 0;
|
cmph_uint32 i = 0;
|
||||||
|
|
||||||
while(i < keylen) {
|
while(i < keylen) {
|
||||||
hash = *ptr + (hash << 6) + (hash << 16) - hash;
|
hash = *ptr + (hash << 6) + (hash << 16) - hash;
|
||||||
@ -27,16 +27,16 @@ uint32 sdbm_hash(sdbm_state_t *state, const char *k, uint32 keylen)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void sdbm_state_dump(sdbm_state_t *state, char **buf, uint32 *buflen)
|
void cmph_sdbm_state_dump(cmph_sdbm_state_t *state, char **buf, cmph_uint32 *buflen)
|
||||||
{
|
{
|
||||||
*buf = NULL;
|
*buf = NULL;
|
||||||
*buflen = 0;
|
*buflen = 0;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
sdbm_state_t *sdbm_state_load(const char *buf, uint32 buflen)
|
cmph_sdbm_state_t *cmph_sdbm_state_load(const char *buf, cmph_uint32 buflen)
|
||||||
{
|
{
|
||||||
sdbm_state_t *state = (sdbm_state_t *)malloc(sizeof(sdbm_state_t));
|
cmph_sdbm_state_t *state = (cmph_sdbm_state_t *)malloc(sizeof(cmph_sdbm_state_t));
|
||||||
state->hashfunc = HASH_SDBM;
|
state->hashfunc = CMPH_HASH_SDBM;
|
||||||
return state;
|
return state;
|
||||||
}
|
}
|
||||||
|
@ -3,15 +3,15 @@
|
|||||||
|
|
||||||
#include "hash.h"
|
#include "hash.h"
|
||||||
|
|
||||||
typedef struct __sdbm_state_t
|
typedef struct cmph__sdbm_state_t
|
||||||
{
|
{
|
||||||
CMPH_HASH hashfunc;
|
CMPH_HASH hashfunc;
|
||||||
} sdbm_state_t;
|
} cmph_sdbm_state_t;
|
||||||
|
|
||||||
sdbm_state_t *sdbm_state_new();
|
cmph_sdbm_state_t *cmph_sdbm_state_new();
|
||||||
uint32 sdbm_hash(sdbm_state_t *state, const char *k, uint32 keylen);
|
cmph_uint32 cmph_sdbm_hash(cmph_sdbm_state_t *state, const char *k, cmph_uint32 keylen);
|
||||||
void sdbm_state_dump(sdbm_state_t *state, char **buf, uint32 *buflen);
|
void cmph_sdbm_state_dump(cmph_sdbm_state_t *state, char **buf, cmph_uint32 *buflen);
|
||||||
sdbm_state_t *sdbm_state_load(const char *buf, uint32 buflen);
|
cmph_sdbm_state_t *cmph_sdbm_state_load(const char *buf, cmph_uint32 buflen);
|
||||||
void sdbm_state_destroy(sdbm_state_t *state);
|
void cmph_sdbm_state_destroy(cmph_sdbm_state_t *state);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
26
src/vqueue.c
26
src/vqueue.c
@ -2,49 +2,49 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
struct __vqueue_t
|
struct cmph__vqueue_t
|
||||||
{
|
{
|
||||||
uint32 * values;
|
cmph_uint32 * values;
|
||||||
uint32 beg, end, capacity;
|
cmph_uint32 beg, end, capacity;
|
||||||
};
|
};
|
||||||
|
|
||||||
vqueue_t * vqueue_new(uint32 capacity)
|
cmph_vqueue_t * cmph_vqueue_new(cmph_uint32 capacity)
|
||||||
{
|
{
|
||||||
vqueue_t *q = (vqueue_t *)malloc(sizeof(vqueue_t));
|
cmph_vqueue_t *q = (cmph_vqueue_t *)malloc(sizeof(cmph_vqueue_t));
|
||||||
assert(q);
|
assert(q);
|
||||||
q->values = (uint32 *)calloc(capacity+1, sizeof(uint32));
|
q->values = (cmph_uint32 *)calloc(capacity+1, sizeof(cmph_uint32));
|
||||||
q->beg = q->end = 0;
|
q->beg = q->end = 0;
|
||||||
q->capacity = capacity+1;
|
q->capacity = capacity+1;
|
||||||
return q;
|
return q;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8 vqueue_is_empty(vqueue_t * q)
|
cmph_uint8 cmph_vqueue_is_empty(cmph_vqueue_t * q)
|
||||||
{
|
{
|
||||||
return (q->beg == q->end);
|
return (q->beg == q->end);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vqueue_insert(vqueue_t * q, uint32 val)
|
void cmph_vqueue_insert(cmph_vqueue_t * q, cmph_uint32 val)
|
||||||
{
|
{
|
||||||
assert((q->end + 1)%q->capacity != q->beg); // Is queue full?
|
assert((q->end + 1)%q->capacity != q->beg); // Is queue full?
|
||||||
q->end = (q->end + 1)%q->capacity;
|
q->end = (q->end + 1)%q->capacity;
|
||||||
q->values[q->end] = val;
|
q->values[q->end] = val;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32 vqueue_remove(vqueue_t * q)
|
cmph_uint32 cmph_vqueue_remove(cmph_vqueue_t * q)
|
||||||
{
|
{
|
||||||
assert(!vqueue_is_empty(q)); // Is queue empty?
|
assert(!cmph_vqueue_is_empty(q)); // Is queue empty?
|
||||||
q->beg = (q->beg + 1)%q->capacity;
|
q->beg = (q->beg + 1)%q->capacity;
|
||||||
return q->values[q->beg];
|
return q->values[q->beg];
|
||||||
}
|
}
|
||||||
|
|
||||||
void vqueue_print(vqueue_t * q)
|
void cmph_vqueue_print(cmph_vqueue_t * q)
|
||||||
{
|
{
|
||||||
uint32 i;
|
cmph_uint32 i;
|
||||||
for (i = q->beg; i != q->end; i = (i + 1)%q->capacity)
|
for (i = q->beg; i != q->end; i = (i + 1)%q->capacity)
|
||||||
fprintf(stderr, "%u\n", q->values[(i + 1)%q->capacity]);
|
fprintf(stderr, "%u\n", q->values[(i + 1)%q->capacity]);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vqueue_destroy(vqueue_t *q)
|
void cmph_vqueue_destroy(cmph_vqueue_t *q)
|
||||||
{
|
{
|
||||||
free(q->values); q->values = NULL;
|
free(q->values); q->values = NULL;
|
||||||
}
|
}
|
||||||
|
14
src/vqueue.h
14
src/vqueue.h
@ -2,17 +2,17 @@
|
|||||||
#define __CMPH_VQUEUE_H__
|
#define __CMPH_VQUEUE_H__
|
||||||
|
|
||||||
#include "cmph_types.h"
|
#include "cmph_types.h"
|
||||||
typedef struct __vqueue_t vqueue_t;
|
typedef struct cmph__vqueue_t cmph_vqueue_t;
|
||||||
|
|
||||||
vqueue_t * vqueue_new(uint32 capacity);
|
cmph_vqueue_t * cmph_vqueue_new(cmph_uint32 capacity);
|
||||||
|
|
||||||
uint8 vqueue_is_empty(vqueue_t * q);
|
cmph_uint8 cmph_vqueue_is_empty(cmph_vqueue_t * q);
|
||||||
|
|
||||||
void vqueue_insert(vqueue_t * q, uint32 val);
|
void cmph_vqueue_insert(cmph_vqueue_t * q, cmph_uint32 val);
|
||||||
|
|
||||||
uint32 vqueue_remove(vqueue_t * q);
|
cmph_uint32 cmph_vqueue_remove(cmph_vqueue_t * q);
|
||||||
|
|
||||||
void vqueue_print(vqueue_t * q);
|
void cmph_vqueue_print(cmph_vqueue_t * q);
|
||||||
|
|
||||||
void vqueue_destroy(vqueue_t * q);
|
void cmph_vqueue_destroy(cmph_vqueue_t * q);
|
||||||
#endif
|
#endif
|
||||||
|
32
src/vstack.c
32
src/vstack.c
@ -6,16 +6,16 @@
|
|||||||
//#define DEBUG
|
//#define DEBUG
|
||||||
#include "debug.h"
|
#include "debug.h"
|
||||||
|
|
||||||
struct __vstack_t
|
struct cmph__vstack_t
|
||||||
{
|
{
|
||||||
uint32 pointer;
|
cmph_uint32 pointer;
|
||||||
uint32 *values;
|
cmph_uint32 *values;
|
||||||
uint32 capacity;
|
cmph_uint32 capacity;
|
||||||
};
|
};
|
||||||
|
|
||||||
vstack_t *vstack_new()
|
cmph_vstack_t *cmph_vstack_new()
|
||||||
{
|
{
|
||||||
vstack_t *stack = (vstack_t *)malloc(sizeof(vstack_t));
|
cmph_vstack_t *stack = (cmph_vstack_t *)malloc(sizeof(cmph_vstack_t));
|
||||||
assert(stack);
|
assert(stack);
|
||||||
stack->pointer = 0;
|
stack->pointer = 0;
|
||||||
stack->values = NULL;
|
stack->values = NULL;
|
||||||
@ -23,54 +23,54 @@ vstack_t *vstack_new()
|
|||||||
return stack;
|
return stack;
|
||||||
}
|
}
|
||||||
|
|
||||||
void vstack_destroy(vstack_t *stack)
|
void cmph_vstack_destroy(cmph_vstack_t *stack)
|
||||||
{
|
{
|
||||||
assert(stack);
|
assert(stack);
|
||||||
free(stack->values);
|
free(stack->values);
|
||||||
free(stack);
|
free(stack);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vstack_push(vstack_t *stack, uint32 val)
|
void cmph_vstack_push(cmph_vstack_t *stack, cmph_uint32 val)
|
||||||
{
|
{
|
||||||
assert(stack);
|
assert(stack);
|
||||||
vstack_reserve(stack, stack->pointer + 1);
|
cmph_vstack_reserve(stack, stack->pointer + 1);
|
||||||
stack->values[stack->pointer] = val;
|
stack->values[stack->pointer] = val;
|
||||||
++(stack->pointer);
|
++(stack->pointer);
|
||||||
}
|
}
|
||||||
void vstack_pop(vstack_t *stack)
|
void cmph_vstack_pop(cmph_vstack_t *stack)
|
||||||
{
|
{
|
||||||
assert(stack);
|
assert(stack);
|
||||||
assert(stack->pointer > 0);
|
assert(stack->pointer > 0);
|
||||||
--(stack->pointer);
|
--(stack->pointer);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32 vstack_top(vstack_t *stack)
|
cmph_uint32 cmph_vstack_top(cmph_vstack_t *stack)
|
||||||
{
|
{
|
||||||
assert(stack);
|
assert(stack);
|
||||||
assert(stack->pointer > 0);
|
assert(stack->pointer > 0);
|
||||||
return stack->values[(stack->pointer - 1)];
|
return stack->values[(stack->pointer - 1)];
|
||||||
}
|
}
|
||||||
int vstack_empty(vstack_t *stack)
|
int cmph_vstack_empty(cmph_vstack_t *stack)
|
||||||
{
|
{
|
||||||
assert(stack);
|
assert(stack);
|
||||||
return stack->pointer == 0;
|
return stack->pointer == 0;
|
||||||
}
|
}
|
||||||
uint32 vstack_size(vstack_t *stack)
|
cmph_uint32 cmph_vstack_size(cmph_vstack_t *stack)
|
||||||
{
|
{
|
||||||
return stack->pointer;
|
return stack->pointer;
|
||||||
}
|
}
|
||||||
void vstack_reserve(vstack_t *stack, uint32 size)
|
void cmph_vstack_reserve(cmph_vstack_t *stack, cmph_uint32 size)
|
||||||
{
|
{
|
||||||
assert(stack);
|
assert(stack);
|
||||||
if (stack->capacity < size)
|
if (stack->capacity < size)
|
||||||
{
|
{
|
||||||
uint32 new_capacity = stack->capacity + 1;
|
cmph_uint32 new_capacity = stack->capacity + 1;
|
||||||
DEBUGP("Increasing current capacity %u to %u\n", stack->capacity, size);
|
DEBUGP("Increasing current capacity %u to %u\n", stack->capacity, size);
|
||||||
while (new_capacity < size)
|
while (new_capacity < size)
|
||||||
{
|
{
|
||||||
new_capacity *= 2;
|
new_capacity *= 2;
|
||||||
}
|
}
|
||||||
stack->values = (uint32 *)realloc(stack->values, sizeof(uint32)*new_capacity);
|
stack->values = (cmph_uint32 *)realloc(stack->values, sizeof(cmph_uint32)*new_capacity);
|
||||||
assert(stack->values);
|
assert(stack->values);
|
||||||
stack->capacity = new_capacity;
|
stack->capacity = new_capacity;
|
||||||
DEBUGP("Increased\n");
|
DEBUGP("Increased\n");
|
||||||
|
18
src/vstack.h
18
src/vstack.h
@ -2,17 +2,17 @@
|
|||||||
#define __CMPH_VSTACK_H__
|
#define __CMPH_VSTACK_H__
|
||||||
|
|
||||||
#include "cmph_types.h"
|
#include "cmph_types.h"
|
||||||
typedef struct __vstack_t vstack_t;
|
typedef struct cmph__vstack_t cmph_vstack_t;
|
||||||
|
|
||||||
vstack_t *vstack_new();
|
cmph_vstack_t *cmph_vstack_new();
|
||||||
void vstack_destroy(vstack_t *stack);
|
void cmph_vstack_destroy(cmph_vstack_t *stack);
|
||||||
|
|
||||||
void vstack_push(vstack_t *stack, uint32 val);
|
void cmph_vstack_push(cmph_vstack_t *stack, cmph_uint32 val);
|
||||||
uint32 vstack_top(vstack_t *stack);
|
cmph_uint32 cmph_vstack_top(cmph_vstack_t *stack);
|
||||||
void vstack_pop(vstack_t *stack);
|
void cmph_vstack_pop(cmph_vstack_t *stack);
|
||||||
int vstack_empty(vstack_t *stack);
|
int cmph_vstack_empty(cmph_vstack_t *stack);
|
||||||
uint32 vstack_size(vstack_t *stack);
|
cmph_uint32 cmph_vstack_size(cmph_vstack_t *stack);
|
||||||
|
|
||||||
void vstack_reserve(vstack_t *stack, uint32 size);
|
void cmph_vstack_reserve(cmph_vstack_t *stack, cmph_uint32 size);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user