Fix to alternate hash functions code. Removed htonl stuff from chm algorithm. Added faq.

This commit is contained in:
davi 2005-01-27 13:01:45 +00:00
parent 928f088348
commit 71a55f697e
11 changed files with 93 additions and 50 deletions

27
FAQ.t2t Normal file
View File

@ -0,0 +1,27 @@
CMPH FAQ
- How do I define the ids of the keys?
- You don't. The ids will be assigned by the algorithm creating the minimal
perfect hash function. If the algorithm creates an **ordered** minimal
perfect hash function, the ids will be the indices of the keys in the
input. Otherwise, you have no guarantee of the distribution of the ids.
- Why I always get the error "Unable to create minimum perfect hashing function"?
- The algorithms do not guarantee that a minimal perfect hash function can
be created. In practice, it will always work if your input
is big enough (>100 keys).
The error is probably because you have duplicated
keys in the input. You must guarantee that the keys are unique in the
input. If you are using a UN*X based OS, try doing
``` #sort input.txt | uniq > input_uniq.txt
and run cmph with input_uniq.txt
----------------------------------------
[Home index.html]
----------------------------------------
Davi de Castro Reis
Fabiano Cupertino Botelho

View File

@ -159,6 +159,10 @@ utility.
keysfile line separated file with keys keysfile line separated file with keys
``` ```
**Additional Documentation**
[FAQ faq.html]
**Downloads** **Downloads**
Use the project page at sourceforge: http://sf.net/projects/cmph Use the project page at sourceforge: http://sf.net/projects/cmph
@ -171,9 +175,9 @@ Code is under the LGPL.
Enjoy! Enjoy!
Davi de Castro Reis Davi de Castro Reis davi@users.sourceforge.net
Fabiano Cupertino Botelho Fabiano Cupertino Botelho fc_botelho@users.sourceforge.net
%!include(html): ''LOGO.html'' %!include(html): ''LOGO.html''
Last Updated: %%date(%c) Last Updated: %%date(%c)

11
gendocs
View File

@ -1,6 +1,13 @@
txt2tags -t html -i README.t2t -o index.html txt2tags -t html --mask-email -i README.t2t -o index.html
txt2tags -t html -i BMZ.t2t -o bmz.html txt2tags -t html -i BMZ.t2t -o bmz.html
txt2tags -t html -i CHM.t2t -o chm.html txt2tags -t html -i CHM.t2t -o chm.html
txt2tags -t html -i COMPARISON.t2t -o comparison.html txt2tags -t html -i COMPARISON.t2t -o comparison.html
txt2tags -t html -i GPERF.t2t -o gperf.html txt2tags -t html -i GPERF.t2t -o gperf.html
txt2tags -t txt -i README.t2t -o README txt2tags -t html -i FAQ.t2t -o faq.html
txt2tags -t txt --mask-email -i README.t2t -o README
txt2tags -t txt -i BMZ.t2t -o BMZ
txt2tags -t txt -i CHM.t2t -o CHM
txt2tags -t txt -i COMPARISON.t2t -o COMPARISON
txt2tags -t txt -i GPERF.t2t -o GPERF
txt2tags -t txt -i FAQ.t2t -o FAQ

View File

@ -27,7 +27,7 @@ static cmph_uint8 bmz_traverse_critical_nodes(bmz_config_data_t *bmz, cmph_uint3
static cmph_uint8 bmz_traverse_critical_nodes_heuristic(bmz_config_data_t *bmz, cmph_uint32 v, cmph_uint32 * biggest_g_value, cmph_uint32 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited); static cmph_uint8 bmz_traverse_critical_nodes_heuristic(bmz_config_data_t *bmz, cmph_uint32 v, cmph_uint32 * biggest_g_value, cmph_uint32 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited);
static void bmz_traverse_non_critical_nodes(bmz_config_data_t *bmz, cmph_uint8 * used_edges, cmph_uint8 * visited); static void bmz_traverse_non_critical_nodes(bmz_config_data_t *bmz, cmph_uint8 * used_edges, cmph_uint8 * visited);
bmz_config_data_t *bmz_config_new(cmph_io_adapter_t *key_source) bmz_config_data_t *bmz_config_new()
{ {
bmz_config_data_t *bmz = NULL; bmz_config_data_t *bmz = NULL;
bmz = (bmz_config_data_t *)malloc(sizeof(bmz_config_data_t)); bmz = (bmz_config_data_t *)malloc(sizeof(bmz_config_data_t));

View File

@ -6,7 +6,7 @@
typedef struct __bmz_data_t bmz_data_t; typedef struct __bmz_data_t bmz_data_t;
typedef struct __bmz_config_data_t bmz_config_data_t; typedef struct __bmz_config_data_t bmz_config_data_t;
bmz_config_data_t *bmz_config_new(cmph_io_adapter_t *key_source); bmz_config_data_t *bmz_config_new();
void bmz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs); void bmz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
void bmz_config_destroy(cmph_config_t *mph); void bmz_config_destroy(cmph_config_t *mph);
cmph_t *bmz_new(cmph_config_t *mph, float c); cmph_t *bmz_new(cmph_config_t *mph, float c);

View File

@ -10,20 +10,14 @@
#include <stdio.h> #include <stdio.h>
#include <assert.h> #include <assert.h>
#include <string.h> #include <string.h>
#include <netinet/in.h>
//#define DEBUG //#define DEBUG
#include "debug.h" #include "debug.h"
/* static const char bitmask[8] = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 }; */
/* #define GETBIT(array, i) (array[(i) / 8] & bitmask[(i) % 8]) */
/* #define SETBIT(array, i) (array[(i) / 8] |= bitmask[(i) % 8]) */
/* #define UNSETBIT(array, i) (array[(i) / 8] &= (~(bitmask[(i) % 8]))) */
static int chm_gen_edges(cmph_config_t *mph); static int chm_gen_edges(cmph_config_t *mph);
static void chm_traverse(chm_config_data_t *chm, cmph_uint8 *visited, cmph_uint32 v); static void chm_traverse(chm_config_data_t *chm, cmph_uint8 *visited, cmph_uint32 v);
chm_config_data_t *chm_config_new(cmph_io_adapter_t *key_source) chm_config_data_t *chm_config_new()
{ {
chm_config_data_t *chm = NULL; chm_config_data_t *chm = NULL;
chm = (chm_config_data_t *)malloc(sizeof(chm_config_data_t)); chm = (chm_config_data_t *)malloc(sizeof(chm_config_data_t));
@ -173,7 +167,7 @@ static int chm_gen_edges(cmph_config_t *mph)
chm_config_data_t *chm = (chm_config_data_t *)mph->data; chm_config_data_t *chm = (chm_config_data_t *)mph->data;
int cycles = 0; int cycles = 0;
DEBUGP("Generating edges for %u vertices\n", chm->n); DEBUGP("Generating edges for %u vertices with hash functions %s and %s\n", chm->n, cmph_hash_names[chm->hashfuncs[0]], cmph_hash_names[chm->hashfuncs[1]]);
graph_clear_edges(chm->graph); graph_clear_edges(chm->graph);
mph->key_source->rewind(mph->key_source->data); mph->key_source->rewind(mph->key_source->data);
for (e = 0; e < mph->key_source->nkeys; ++e) for (e = 0; e < mph->key_source->nkeys; ++e)
@ -206,39 +200,28 @@ int chm_dump(cmph_t *mphf, FILE *fd)
{ {
char *buf = NULL; char *buf = NULL;
cmph_uint32 buflen; cmph_uint32 buflen;
cmph_uint32 nbuflen;
cmph_uint32 i; cmph_uint32 i;
cmph_uint32 two = htonl(2); //number of hash functions cmph_uint32 two = 2; //number of hash functions
chm_data_t *data = (chm_data_t *)mphf->data; chm_data_t *data = (chm_data_t *)mphf->data;
cmph_uint32 nn, nm;
__cmph_dump(mphf, fd); __cmph_dump(mphf, fd);
fwrite(&two, sizeof(cmph_uint32), 1, fd); fwrite(&two, sizeof(cmph_uint32), 1, fd);
hash_state_dump(data->hashes[0], &buf, &buflen); hash_state_dump(data->hashes[0], &buf, &buflen);
DEBUGP("Dumping hash state with %u bytes to disk\n", buflen); DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
nbuflen = htonl(buflen); fwrite(&buflen, sizeof(cmph_uint32), 1, fd);
fwrite(&nbuflen, sizeof(cmph_uint32), 1, fd);
fwrite(buf, buflen, 1, fd); fwrite(buf, buflen, 1, fd);
free(buf); free(buf);
hash_state_dump(data->hashes[1], &buf, &buflen); hash_state_dump(data->hashes[1], &buf, &buflen);
DEBUGP("Dumping hash state with %u bytes to disk\n", buflen); DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
nbuflen = htonl(buflen); fwrite(&buflen, sizeof(cmph_uint32), 1, fd);
fwrite(&nbuflen, sizeof(cmph_uint32), 1, fd);
fwrite(buf, buflen, 1, fd); fwrite(buf, buflen, 1, fd);
free(buf); free(buf);
nn = htonl(data->n); fwrite(&(data->n), sizeof(cmph_uint32), 1, fd);
fwrite(&nn, sizeof(cmph_uint32), 1, fd); fwrite(&(data->m), sizeof(cmph_uint32), 1, fd);
nm = htonl(data->m);
fwrite(&nm, sizeof(cmph_uint32), 1, fd);
for (i = 0; i < data->n; ++i) fwrite(data->g, sizeof(cmph_uint32)*data->n, 1, fd);
{
cmph_uint32 ng = htonl(data->g[i]);
fwrite(&ng, sizeof(cmph_uint32), 1, fd);
}
#ifdef DEBUG #ifdef DEBUG
fprintf(stderr, "G: "); fprintf(stderr, "G: ");
for (i = 0; i < data->n; ++i) fprintf(stderr, "%u ", data->g[i]); for (i = 0; i < data->n; ++i) fprintf(stderr, "%u ", data->g[i]);
@ -260,7 +243,6 @@ void chm_load(FILE *f, cmph_t *mphf)
DEBUGP("Loading chm mphf\n"); DEBUGP("Loading chm mphf\n");
mphf->data = chm; mphf->data = chm;
fread(&nhashes, sizeof(cmph_uint32), 1, f); fread(&nhashes, sizeof(cmph_uint32), 1, f);
nhashes = ntohl(nhashes);
chm->hashes = (hash_state_t **)malloc(sizeof(hash_state_t *)*(nhashes + 1)); chm->hashes = (hash_state_t **)malloc(sizeof(hash_state_t *)*(nhashes + 1));
chm->hashes[nhashes] = NULL; chm->hashes[nhashes] = NULL;
DEBUGP("Reading %u hashes\n", nhashes); DEBUGP("Reading %u hashes\n", nhashes);
@ -268,7 +250,6 @@ void chm_load(FILE *f, cmph_t *mphf)
{ {
hash_state_t *state = NULL; hash_state_t *state = NULL;
fread(&buflen, sizeof(cmph_uint32), 1, f); fread(&buflen, sizeof(cmph_uint32), 1, f);
buflen = ntohl(buflen);
DEBUGP("Hash state has %u bytes\n", buflen); DEBUGP("Hash state has %u bytes\n", buflen);
buf = (char *)malloc(buflen); buf = (char *)malloc(buflen);
fread(buf, buflen, 1, f); fread(buf, buflen, 1, f);
@ -279,13 +260,10 @@ void chm_load(FILE *f, cmph_t *mphf)
DEBUGP("Reading m and n\n"); DEBUGP("Reading m and n\n");
fread(&(chm->n), sizeof(cmph_uint32), 1, f); fread(&(chm->n), sizeof(cmph_uint32), 1, f);
chm->n = ntohl(chm->n);
fread(&(chm->m), sizeof(cmph_uint32), 1, f); fread(&(chm->m), sizeof(cmph_uint32), 1, f);
chm->m = ntohl(chm->m);
chm->g = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*chm->n); chm->g = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*chm->n);
fread(chm->g, chm->n*sizeof(cmph_uint32), 1, f); fread(chm->g, chm->n*sizeof(cmph_uint32), 1, f);
for (i = 0; i < chm->n; ++i) chm->g[i] = ntohl(chm->g[i]);
#ifdef DEBUG #ifdef DEBUG
fprintf(stderr, "G: "); fprintf(stderr, "G: ");
for (i = 0; i < chm->n; ++i) fprintf(stderr, "%u ", chm->g[i]); for (i = 0; i < chm->n; ++i) fprintf(stderr, "%u ", chm->g[i]);

View File

@ -6,7 +6,7 @@
typedef struct __chm_data_t chm_data_t; typedef struct __chm_data_t chm_data_t;
typedef struct __chm_config_data_t chm_config_data_t; typedef struct __chm_config_data_t chm_config_data_t;
chm_config_data_t *chm_config_new(cmph_io_adapter_t *key_source); chm_config_data_t *chm_config_new();
void chm_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs); void chm_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
void chm_config_destroy(cmph_config_t *mph); void chm_config_destroy(cmph_config_t *mph);
cmph_t *chm_new(cmph_config_t *mph, float c); cmph_t *chm_new(cmph_config_t *mph, float c);

View File

@ -98,11 +98,37 @@ cmph_config_t *cmph_config_new(cmph_io_adapter_t *key_source)
mph = __config_new(key_source); mph = __config_new(key_source);
assert(mph); assert(mph);
mph->algo = CMPH_CHM; // default value mph->algo = CMPH_CHM; // default value
mph->data = chm_config_new();
return mph; return mph;
} }
void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo) void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo)
{ {
if (algo != mph->algo)
{
switch (mph->algo)
{
case CMPH_CHM:
chm_config_destroy(mph->data);
break;
case CMPH_BMZ:
bmz_config_destroy(mph->data);
break;
default:
assert(0);
}
switch(algo)
{
case CMPH_CHM:
mph->data = chm_config_new();
break;
case CMPH_BMZ:
mph->data = bmz_config_new();
break;
default:
assert(0);
}
}
mph->algo = algo; mph->algo = algo;
} }
@ -159,13 +185,11 @@ cmph_t *cmph_new(cmph_config_t *mph)
{ {
case CMPH_CHM: case CMPH_CHM:
DEBUGP("Creating chm hash\n"); DEBUGP("Creating chm hash\n");
mph->data = chm_config_new(mph->key_source);
if (c == 0) c = 2.09; if (c == 0) c = 2.09;
mphf = chm_new(mph, c); mphf = chm_new(mph, c);
break; break;
case CMPH_BMZ: /* included -- Fabiano */ case CMPH_BMZ: /* included -- Fabiano */
DEBUGP("Creating bmz hash\n"); DEBUGP("Creating bmz hash\n");
mph->data = bmz_config_new(mph->key_source);
if (c == 0) c = 1.15; if (c == 0) c = 1.15;
mphf = bmz_new(mph, c); mphf = bmz_new(mph, c);
break; break;

View File

@ -12,6 +12,7 @@ cmph_config_t *__config_new(cmph_io_adapter_t *key_source)
if (mph == NULL) return NULL; if (mph == NULL) return NULL;
mph->key_source = key_source; mph->key_source = key_source;
mph->verbosity = 0; mph->verbosity = 0;
mph->data = NULL;
float c = 0; float c = 0;
return mph; return mph;
} }
@ -23,9 +24,8 @@ void __config_destroy(cmph_config_t *mph)
void __cmph_dump(cmph_t *mphf, FILE *fd) void __cmph_dump(cmph_t *mphf, FILE *fd)
{ {
cmph_uint32 nsize = htonl(mphf->size);
fwrite(cmph_names[mphf->algo], (cmph_uint32)(strlen(cmph_names[mphf->algo]) + 1), 1, fd); fwrite(cmph_names[mphf->algo], (cmph_uint32)(strlen(cmph_names[mphf->algo]) + 1), 1, fd);
fwrite(&nsize, sizeof(mphf->size), 1, fd); fwrite(&(mphf->size), sizeof(mphf->size), 1, fd);
} }
cmph_t *__cmph_load(FILE *f) cmph_t *__cmph_load(FILE *f)
{ {
@ -58,7 +58,6 @@ cmph_t *__cmph_load(FILE *f)
mphf = (cmph_t *)malloc(sizeof(cmph_t)); mphf = (cmph_t *)malloc(sizeof(cmph_t));
mphf->algo = algo; mphf->algo = algo;
fread(&(mphf->size), sizeof(mphf->size), 1, f); fread(&(mphf->size), sizeof(mphf->size), 1, f);
mphf->size = ntohl(mphf->size);
mphf->data = NULL; mphf->data = NULL;
DEBUGP("Algorithm is %s and mphf is sized %u\n", cmph_names[algo], mphf->size); DEBUGP("Algorithm is %s and mphf is sized %u\n", cmph_names[algo], mphf->size);

View File

@ -14,7 +14,7 @@
#endif #endif
#endif #endif
#ifdef WIN32 #ifndef __GNUC__
#ifndef __DEBUG_H__ #ifndef __DEBUG_H__
#define __DEBUG_H__ #define __DEBUG_H__
#include <stdarg.h> #include <stdarg.h>
@ -39,13 +39,13 @@ static void dummyprintf(const char *format, ...)
#endif #endif
#ifdef DEBUG #ifdef DEBUG
#ifdef WIN32 #ifndef __GNUC__
#define DEBUGP debugprintf #define DEBUGP debugprintf
#else #else
#define DEBUGP(args...) do { fprintf(stderr, "%s:%d ", __FILE__, __LINE__); fprintf(stderr, ## args); } while(0) #define DEBUGP(args...) do { fprintf(stderr, "%s:%d ", __FILE__, __LINE__); fprintf(stderr, ## args); } while(0)
#endif #endif
#else #else
#ifdef WIN32 #ifndef __GNUC__
#define DEBUGP dummyprintf #define DEBUGP dummyprintf
#else #else
#define DEBUGP(args...) #define DEBUGP(args...)

View File

@ -1,4 +1,8 @@
#ifdef WIN32
#include "../wingetopt.h" #include "../wingetopt.h"
#else
#include <getopt.h>
#endif
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <errno.h> #include <errno.h>