Fix to alternate hash functions code. Removed htonl stuff from chm algorithm. Added faq.
This commit is contained in:
parent
928f088348
commit
71a55f697e
27
FAQ.t2t
Normal file
27
FAQ.t2t
Normal file
@ -0,0 +1,27 @@
|
||||
CMPH FAQ
|
||||
|
||||
|
||||
|
||||
- How do I define the ids of the keys?
|
||||
- You don't. The ids will be assigned by the algorithm creating the minimal
|
||||
perfect hash function. If the algorithm creates an **ordered** minimal
|
||||
perfect hash function, the ids will be the indices of the keys in the
|
||||
input. Otherwise, you have no guarantee of the distribution of the ids.
|
||||
|
||||
- Why I always get the error "Unable to create minimum perfect hashing function"?
|
||||
- The algorithms do not guarantee that a minimal perfect hash function can
|
||||
be created. In practice, it will always work if your input
|
||||
is big enough (>100 keys).
|
||||
The error is probably because you have duplicated
|
||||
keys in the input. You must guarantee that the keys are unique in the
|
||||
input. If you are using a UN*X based OS, try doing
|
||||
``` #sort input.txt | uniq > input_uniq.txt
|
||||
and run cmph with input_uniq.txt
|
||||
|
||||
----------------------------------------
|
||||
[Home index.html]
|
||||
----------------------------------------
|
||||
|
||||
Davi de Castro Reis
|
||||
|
||||
Fabiano Cupertino Botelho
|
@ -159,6 +159,10 @@ utility.
|
||||
keysfile line separated file with keys
|
||||
```
|
||||
|
||||
**Additional Documentation**
|
||||
|
||||
[FAQ faq.html]
|
||||
|
||||
**Downloads**
|
||||
|
||||
Use the project page at sourceforge: http://sf.net/projects/cmph
|
||||
@ -171,9 +175,9 @@ Code is under the LGPL.
|
||||
|
||||
Enjoy!
|
||||
|
||||
Davi de Castro Reis
|
||||
Davi de Castro Reis davi@users.sourceforge.net
|
||||
|
||||
Fabiano Cupertino Botelho
|
||||
Fabiano Cupertino Botelho fc_botelho@users.sourceforge.net
|
||||
|
||||
%!include(html): ''LOGO.html''
|
||||
Last Updated: %%date(%c)
|
||||
|
11
gendocs
11
gendocs
@ -1,6 +1,13 @@
|
||||
txt2tags -t html -i README.t2t -o index.html
|
||||
txt2tags -t html --mask-email -i README.t2t -o index.html
|
||||
txt2tags -t html -i BMZ.t2t -o bmz.html
|
||||
txt2tags -t html -i CHM.t2t -o chm.html
|
||||
txt2tags -t html -i COMPARISON.t2t -o comparison.html
|
||||
txt2tags -t html -i GPERF.t2t -o gperf.html
|
||||
txt2tags -t txt -i README.t2t -o README
|
||||
txt2tags -t html -i FAQ.t2t -o faq.html
|
||||
|
||||
txt2tags -t txt --mask-email -i README.t2t -o README
|
||||
txt2tags -t txt -i BMZ.t2t -o BMZ
|
||||
txt2tags -t txt -i CHM.t2t -o CHM
|
||||
txt2tags -t txt -i COMPARISON.t2t -o COMPARISON
|
||||
txt2tags -t txt -i GPERF.t2t -o GPERF
|
||||
txt2tags -t txt -i FAQ.t2t -o FAQ
|
||||
|
@ -27,7 +27,7 @@ static cmph_uint8 bmz_traverse_critical_nodes(bmz_config_data_t *bmz, cmph_uint3
|
||||
static cmph_uint8 bmz_traverse_critical_nodes_heuristic(bmz_config_data_t *bmz, cmph_uint32 v, cmph_uint32 * biggest_g_value, cmph_uint32 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited);
|
||||
static void bmz_traverse_non_critical_nodes(bmz_config_data_t *bmz, cmph_uint8 * used_edges, cmph_uint8 * visited);
|
||||
|
||||
bmz_config_data_t *bmz_config_new(cmph_io_adapter_t *key_source)
|
||||
bmz_config_data_t *bmz_config_new()
|
||||
{
|
||||
bmz_config_data_t *bmz = NULL;
|
||||
bmz = (bmz_config_data_t *)malloc(sizeof(bmz_config_data_t));
|
||||
|
@ -6,7 +6,7 @@
|
||||
typedef struct __bmz_data_t bmz_data_t;
|
||||
typedef struct __bmz_config_data_t bmz_config_data_t;
|
||||
|
||||
bmz_config_data_t *bmz_config_new(cmph_io_adapter_t *key_source);
|
||||
bmz_config_data_t *bmz_config_new();
|
||||
void bmz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
|
||||
void bmz_config_destroy(cmph_config_t *mph);
|
||||
cmph_t *bmz_new(cmph_config_t *mph, float c);
|
||||
|
38
src/chm.c
38
src/chm.c
@ -10,20 +10,14 @@
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include <netinet/in.h>
|
||||
|
||||
//#define DEBUG
|
||||
#include "debug.h"
|
||||
|
||||
/* static const char bitmask[8] = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 }; */
|
||||
/* #define GETBIT(array, i) (array[(i) / 8] & bitmask[(i) % 8]) */
|
||||
/* #define SETBIT(array, i) (array[(i) / 8] |= bitmask[(i) % 8]) */
|
||||
/* #define UNSETBIT(array, i) (array[(i) / 8] &= (~(bitmask[(i) % 8]))) */
|
||||
|
||||
static int chm_gen_edges(cmph_config_t *mph);
|
||||
static void chm_traverse(chm_config_data_t *chm, cmph_uint8 *visited, cmph_uint32 v);
|
||||
|
||||
chm_config_data_t *chm_config_new(cmph_io_adapter_t *key_source)
|
||||
chm_config_data_t *chm_config_new()
|
||||
{
|
||||
chm_config_data_t *chm = NULL;
|
||||
chm = (chm_config_data_t *)malloc(sizeof(chm_config_data_t));
|
||||
@ -173,7 +167,7 @@ static int chm_gen_edges(cmph_config_t *mph)
|
||||
chm_config_data_t *chm = (chm_config_data_t *)mph->data;
|
||||
int cycles = 0;
|
||||
|
||||
DEBUGP("Generating edges for %u vertices\n", chm->n);
|
||||
DEBUGP("Generating edges for %u vertices with hash functions %s and %s\n", chm->n, cmph_hash_names[chm->hashfuncs[0]], cmph_hash_names[chm->hashfuncs[1]]);
|
||||
graph_clear_edges(chm->graph);
|
||||
mph->key_source->rewind(mph->key_source->data);
|
||||
for (e = 0; e < mph->key_source->nkeys; ++e)
|
||||
@ -206,39 +200,28 @@ int chm_dump(cmph_t *mphf, FILE *fd)
|
||||
{
|
||||
char *buf = NULL;
|
||||
cmph_uint32 buflen;
|
||||
cmph_uint32 nbuflen;
|
||||
cmph_uint32 i;
|
||||
cmph_uint32 two = htonl(2); //number of hash functions
|
||||
cmph_uint32 two = 2; //number of hash functions
|
||||
chm_data_t *data = (chm_data_t *)mphf->data;
|
||||
cmph_uint32 nn, nm;
|
||||
__cmph_dump(mphf, fd);
|
||||
|
||||
fwrite(&two, sizeof(cmph_uint32), 1, fd);
|
||||
|
||||
hash_state_dump(data->hashes[0], &buf, &buflen);
|
||||
DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
|
||||
nbuflen = htonl(buflen);
|
||||
fwrite(&nbuflen, sizeof(cmph_uint32), 1, fd);
|
||||
fwrite(&buflen, sizeof(cmph_uint32), 1, fd);
|
||||
fwrite(buf, buflen, 1, fd);
|
||||
free(buf);
|
||||
|
||||
hash_state_dump(data->hashes[1], &buf, &buflen);
|
||||
DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
|
||||
nbuflen = htonl(buflen);
|
||||
fwrite(&nbuflen, sizeof(cmph_uint32), 1, fd);
|
||||
fwrite(&buflen, sizeof(cmph_uint32), 1, fd);
|
||||
fwrite(buf, buflen, 1, fd);
|
||||
free(buf);
|
||||
|
||||
nn = htonl(data->n);
|
||||
fwrite(&nn, sizeof(cmph_uint32), 1, fd);
|
||||
nm = htonl(data->m);
|
||||
fwrite(&nm, sizeof(cmph_uint32), 1, fd);
|
||||
fwrite(&(data->n), sizeof(cmph_uint32), 1, fd);
|
||||
fwrite(&(data->m), sizeof(cmph_uint32), 1, fd);
|
||||
|
||||
for (i = 0; i < data->n; ++i)
|
||||
{
|
||||
cmph_uint32 ng = htonl(data->g[i]);
|
||||
fwrite(&ng, sizeof(cmph_uint32), 1, fd);
|
||||
}
|
||||
fwrite(data->g, sizeof(cmph_uint32)*data->n, 1, fd);
|
||||
#ifdef DEBUG
|
||||
fprintf(stderr, "G: ");
|
||||
for (i = 0; i < data->n; ++i) fprintf(stderr, "%u ", data->g[i]);
|
||||
@ -260,7 +243,6 @@ void chm_load(FILE *f, cmph_t *mphf)
|
||||
DEBUGP("Loading chm mphf\n");
|
||||
mphf->data = chm;
|
||||
fread(&nhashes, sizeof(cmph_uint32), 1, f);
|
||||
nhashes = ntohl(nhashes);
|
||||
chm->hashes = (hash_state_t **)malloc(sizeof(hash_state_t *)*(nhashes + 1));
|
||||
chm->hashes[nhashes] = NULL;
|
||||
DEBUGP("Reading %u hashes\n", nhashes);
|
||||
@ -268,7 +250,6 @@ void chm_load(FILE *f, cmph_t *mphf)
|
||||
{
|
||||
hash_state_t *state = NULL;
|
||||
fread(&buflen, sizeof(cmph_uint32), 1, f);
|
||||
buflen = ntohl(buflen);
|
||||
DEBUGP("Hash state has %u bytes\n", buflen);
|
||||
buf = (char *)malloc(buflen);
|
||||
fread(buf, buflen, 1, f);
|
||||
@ -279,13 +260,10 @@ void chm_load(FILE *f, cmph_t *mphf)
|
||||
|
||||
DEBUGP("Reading m and n\n");
|
||||
fread(&(chm->n), sizeof(cmph_uint32), 1, f);
|
||||
chm->n = ntohl(chm->n);
|
||||
fread(&(chm->m), sizeof(cmph_uint32), 1, f);
|
||||
chm->m = ntohl(chm->m);
|
||||
|
||||
chm->g = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*chm->n);
|
||||
fread(chm->g, chm->n*sizeof(cmph_uint32), 1, f);
|
||||
for (i = 0; i < chm->n; ++i) chm->g[i] = ntohl(chm->g[i]);
|
||||
#ifdef DEBUG
|
||||
fprintf(stderr, "G: ");
|
||||
for (i = 0; i < chm->n; ++i) fprintf(stderr, "%u ", chm->g[i]);
|
||||
|
@ -6,7 +6,7 @@
|
||||
typedef struct __chm_data_t chm_data_t;
|
||||
typedef struct __chm_config_data_t chm_config_data_t;
|
||||
|
||||
chm_config_data_t *chm_config_new(cmph_io_adapter_t *key_source);
|
||||
chm_config_data_t *chm_config_new();
|
||||
void chm_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
|
||||
void chm_config_destroy(cmph_config_t *mph);
|
||||
cmph_t *chm_new(cmph_config_t *mph, float c);
|
||||
|
36
src/cmph.c
36
src/cmph.c
@ -98,12 +98,38 @@ cmph_config_t *cmph_config_new(cmph_io_adapter_t *key_source)
|
||||
mph = __config_new(key_source);
|
||||
assert(mph);
|
||||
mph->algo = CMPH_CHM; // default value
|
||||
mph->data = chm_config_new();
|
||||
return mph;
|
||||
}
|
||||
|
||||
void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo)
|
||||
{
|
||||
mph->algo = algo;
|
||||
if (algo != mph->algo)
|
||||
{
|
||||
switch (mph->algo)
|
||||
{
|
||||
case CMPH_CHM:
|
||||
chm_config_destroy(mph->data);
|
||||
break;
|
||||
case CMPH_BMZ:
|
||||
bmz_config_destroy(mph->data);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
switch(algo)
|
||||
{
|
||||
case CMPH_CHM:
|
||||
mph->data = chm_config_new();
|
||||
break;
|
||||
case CMPH_BMZ:
|
||||
mph->data = bmz_config_new();
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
mph->algo = algo;
|
||||
}
|
||||
|
||||
void cmph_config_destroy(cmph_config_t *mph)
|
||||
@ -115,7 +141,7 @@ void cmph_config_destroy(cmph_config_t *mph)
|
||||
chm_config_destroy(mph);
|
||||
break;
|
||||
case CMPH_BMZ: /* included -- Fabiano */
|
||||
bmz_config_destroy(mph);
|
||||
bmz_config_destroy(mph);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
@ -159,13 +185,11 @@ cmph_t *cmph_new(cmph_config_t *mph)
|
||||
{
|
||||
case CMPH_CHM:
|
||||
DEBUGP("Creating chm hash\n");
|
||||
mph->data = chm_config_new(mph->key_source);
|
||||
if (c == 0) c = 2.09;
|
||||
mphf = chm_new(mph, c);
|
||||
break;
|
||||
case CMPH_BMZ: /* included -- Fabiano */
|
||||
DEBUGP("Creating bmz hash\n");
|
||||
mph->data = bmz_config_new(mph->key_source);
|
||||
if (c == 0) c = 1.15;
|
||||
mphf = bmz_new(mph, c);
|
||||
break;
|
||||
@ -205,8 +229,8 @@ cmph_t *cmph_load(FILE *f)
|
||||
chm_load(f, mphf);
|
||||
break;
|
||||
case CMPH_BMZ: /* included -- Fabiano */
|
||||
DEBUGP("Loading bmz algorithm dependent parts\n");
|
||||
bmz_load(f, mphf);
|
||||
DEBUGP("Loading bmz algorithm dependent parts\n");
|
||||
bmz_load(f, mphf);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
|
@ -12,6 +12,7 @@ cmph_config_t *__config_new(cmph_io_adapter_t *key_source)
|
||||
if (mph == NULL) return NULL;
|
||||
mph->key_source = key_source;
|
||||
mph->verbosity = 0;
|
||||
mph->data = NULL;
|
||||
float c = 0;
|
||||
return mph;
|
||||
}
|
||||
@ -23,9 +24,8 @@ void __config_destroy(cmph_config_t *mph)
|
||||
|
||||
void __cmph_dump(cmph_t *mphf, FILE *fd)
|
||||
{
|
||||
cmph_uint32 nsize = htonl(mphf->size);
|
||||
fwrite(cmph_names[mphf->algo], (cmph_uint32)(strlen(cmph_names[mphf->algo]) + 1), 1, fd);
|
||||
fwrite(&nsize, sizeof(mphf->size), 1, fd);
|
||||
fwrite(&(mphf->size), sizeof(mphf->size), 1, fd);
|
||||
}
|
||||
cmph_t *__cmph_load(FILE *f)
|
||||
{
|
||||
@ -58,7 +58,6 @@ cmph_t *__cmph_load(FILE *f)
|
||||
mphf = (cmph_t *)malloc(sizeof(cmph_t));
|
||||
mphf->algo = algo;
|
||||
fread(&(mphf->size), sizeof(mphf->size), 1, f);
|
||||
mphf->size = ntohl(mphf->size);
|
||||
mphf->data = NULL;
|
||||
DEBUGP("Algorithm is %s and mphf is sized %u\n", cmph_names[algo], mphf->size);
|
||||
|
||||
|
@ -14,7 +14,7 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef WIN32
|
||||
#ifndef __GNUC__
|
||||
#ifndef __DEBUG_H__
|
||||
#define __DEBUG_H__
|
||||
#include <stdarg.h>
|
||||
@ -39,13 +39,13 @@ static void dummyprintf(const char *format, ...)
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG
|
||||
#ifdef WIN32
|
||||
#ifndef __GNUC__
|
||||
#define DEBUGP debugprintf
|
||||
#else
|
||||
#define DEBUGP(args...) do { fprintf(stderr, "%s:%d ", __FILE__, __LINE__); fprintf(stderr, ## args); } while(0)
|
||||
#endif
|
||||
#else
|
||||
#ifdef WIN32
|
||||
#ifndef __GNUC__
|
||||
#define DEBUGP dummyprintf
|
||||
#else
|
||||
#define DEBUGP(args...)
|
||||
|
@ -1,4 +1,8 @@
|
||||
#ifdef WIN32
|
||||
#include "../wingetopt.h"
|
||||
#else
|
||||
#include <getopt.h>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <errno.h>
|
||||
@ -218,7 +222,7 @@ int main(int argc, char **argv)
|
||||
}
|
||||
else
|
||||
{
|
||||
cmph_uint8 * hashtable = NULL;
|
||||
cmph_uint8 * hashtable = NULL;
|
||||
mphf_fd = fopen(mphf_file, "r");
|
||||
if (mphf_fd == NULL)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user