2004-12-23 15:16:30 +02:00
|
|
|
#include "cmph.h"
|
|
|
|
#include "cmph_structs.h"
|
|
|
|
#include "czech.h"
|
|
|
|
#include "bmz.h"
|
|
|
|
//#include "bmz.h" /* included -- Fabiano */
|
|
|
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <assert.h>
|
|
|
|
|
|
|
|
//#define DEBUG
|
|
|
|
#include "debug.h"
|
|
|
|
|
2005-01-21 22:42:33 +02:00
|
|
|
const char *cmph_names[] = { "bmz", "czech", NULL }; /* included -- Fabiano */
|
2004-12-23 15:16:30 +02:00
|
|
|
|
2005-01-24 22:25:58 +02:00
|
|
|
static int key_nlfile_read(void *data, char **key, cmph_uint32 *keylen)
|
|
|
|
{
|
|
|
|
FILE *fd = (FILE *)data;
|
|
|
|
*key = NULL;
|
|
|
|
*keylen = 0;
|
|
|
|
while(1)
|
|
|
|
{
|
|
|
|
char buf[BUFSIZ];
|
|
|
|
char *c = fgets(buf, BUFSIZ, fd);
|
|
|
|
if (c == NULL) return -1;
|
|
|
|
if (feof(fd)) return -1;
|
|
|
|
*key = (char *)realloc(*key, *keylen + strlen(buf) + 1);
|
|
|
|
memcpy(*key + *keylen, buf, strlen(buf));
|
|
|
|
*keylen += (cmph_uint32)strlen(buf);
|
|
|
|
if (buf[strlen(buf) - 1] != '\n') continue;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if ((*keylen) && (*key)[*keylen - 1] == '\n')
|
|
|
|
{
|
|
|
|
(*key)[(*keylen) - 1] = 0;
|
|
|
|
--(*keylen);
|
|
|
|
}
|
|
|
|
return *keylen;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void key_nlfile_dispose(void *data, char *key, cmph_uint32 keylen)
|
|
|
|
{
|
|
|
|
free(key);
|
|
|
|
}
|
|
|
|
static void key_nlfile_rewind(void *data)
|
|
|
|
{
|
|
|
|
FILE *fd = (FILE *)data;
|
|
|
|
rewind(fd);
|
|
|
|
}
|
|
|
|
|
|
|
|
static cmph_uint32 count_nlfile_keys(FILE *fd)
|
|
|
|
{
|
|
|
|
cmph_uint32 count = 0;
|
|
|
|
rewind(fd);
|
|
|
|
while(1)
|
|
|
|
{
|
|
|
|
char buf[BUFSIZ];
|
|
|
|
fgets(buf, BUFSIZ, fd);
|
|
|
|
if (feof(fd)) break;
|
|
|
|
if (buf[strlen(buf) - 1] != '\n') continue;
|
|
|
|
++count;
|
|
|
|
}
|
|
|
|
rewind(fd);
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
cmph_io_adapter_t *cmph_io_nlfile_adapter(FILE * keys_fd)
|
|
|
|
{
|
|
|
|
cmph_io_adapter_t * key_source = malloc(sizeof(cmph_io_adapter_t));
|
|
|
|
assert(key_source);
|
|
|
|
key_source->data = (void *)keys_fd;
|
|
|
|
key_source->nkeys = count_nlfile_keys(keys_fd);
|
|
|
|
key_source->read = key_nlfile_read;
|
|
|
|
key_source->dispose = key_nlfile_dispose;
|
|
|
|
key_source->rewind = key_nlfile_rewind;
|
|
|
|
return key_source;
|
|
|
|
}
|
|
|
|
|
|
|
|
cmph_io_adapter_t *cmph_io_nlnkfile_adapter(FILE * keys_fd, cmph_uint32 nkeys)
|
|
|
|
{
|
|
|
|
cmph_io_adapter_t * key_source = malloc(sizeof(cmph_io_adapter_t));
|
|
|
|
assert(key_source);
|
|
|
|
key_source->data = (void *)keys_fd;
|
|
|
|
key_source->nkeys = nkeys;
|
|
|
|
key_source->read = key_nlfile_read;
|
|
|
|
key_source->dispose = key_nlfile_dispose;
|
|
|
|
key_source->rewind = key_nlfile_rewind;
|
|
|
|
return key_source;
|
|
|
|
}
|
|
|
|
|
|
|
|
cmph_io_adapter_t *cmph_io_vector_adapter(const char ** vector, cmph_uint32 nkeys)
|
|
|
|
{
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
cmph_config_t *cmph_config_new(cmph_io_adapter_t *key_source)
|
2004-12-23 15:16:30 +02:00
|
|
|
{
|
2005-01-21 22:42:33 +02:00
|
|
|
cmph_config_t *mph = NULL;
|
|
|
|
mph = __config_new(key_source);
|
2004-12-23 15:16:30 +02:00
|
|
|
assert(mph);
|
2005-01-21 22:42:33 +02:00
|
|
|
mph->algo = CMPH_CZECH; // default value
|
2004-12-23 15:16:30 +02:00
|
|
|
return mph;
|
|
|
|
}
|
|
|
|
|
2005-01-21 22:42:33 +02:00
|
|
|
void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo)
|
|
|
|
{
|
|
|
|
mph->algo = algo;
|
|
|
|
}
|
|
|
|
|
|
|
|
void cmph_config_destroy(cmph_config_t *mph)
|
2004-12-23 15:16:30 +02:00
|
|
|
{
|
2005-01-18 23:06:08 +02:00
|
|
|
DEBUGP("Destroying mph with algo %s\n", cmph_names[mph->algo]);
|
2004-12-23 15:16:30 +02:00
|
|
|
switch (mph->algo)
|
|
|
|
{
|
2005-01-18 23:06:08 +02:00
|
|
|
case CMPH_CZECH:
|
2005-01-21 22:42:33 +02:00
|
|
|
czech_config_destroy(mph);
|
2004-12-23 15:16:30 +02:00
|
|
|
break;
|
2005-01-18 23:06:08 +02:00
|
|
|
case CMPH_BMZ: /* included -- Fabiano */
|
2005-01-21 22:42:33 +02:00
|
|
|
bmz_config_destroy(mph);
|
2004-12-23 15:16:30 +02:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
assert(0);
|
|
|
|
}
|
2005-01-21 22:42:33 +02:00
|
|
|
__config_destroy(mph);
|
2004-12-23 15:16:30 +02:00
|
|
|
}
|
|
|
|
|
2005-01-21 22:42:33 +02:00
|
|
|
void cmph_config_set_verbosity(cmph_config_t *mph, cmph_uint32 verbosity)
|
2004-12-23 15:16:30 +02:00
|
|
|
{
|
|
|
|
mph->verbosity = verbosity;
|
|
|
|
}
|
|
|
|
|
2005-01-21 22:42:33 +02:00
|
|
|
void cmph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
|
2004-12-23 15:16:30 +02:00
|
|
|
{
|
|
|
|
switch (mph->algo)
|
|
|
|
{
|
2005-01-18 23:06:08 +02:00
|
|
|
case CMPH_CZECH:
|
2005-01-21 22:42:33 +02:00
|
|
|
czech_config_set_hashfuncs(mph, hashfuncs);
|
2004-12-23 15:16:30 +02:00
|
|
|
break;
|
2005-01-18 23:06:08 +02:00
|
|
|
case CMPH_BMZ: /* included -- Fabiano */
|
2005-01-21 22:42:33 +02:00
|
|
|
bmz_config_set_hashfuncs(mph, hashfuncs);
|
2004-12-23 15:16:30 +02:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
2005-01-21 22:42:33 +02:00
|
|
|
void cmph_config_set_graphsize(cmph_config_t *mph, float c)
|
2005-01-18 14:18:51 +02:00
|
|
|
{
|
|
|
|
mph->c = c;
|
|
|
|
return;
|
|
|
|
}
|
2004-12-23 15:16:30 +02:00
|
|
|
|
2005-01-21 22:42:33 +02:00
|
|
|
cmph_t *cmph_new(cmph_config_t *mph)
|
2004-12-23 15:16:30 +02:00
|
|
|
{
|
2005-01-21 22:42:33 +02:00
|
|
|
cmph_t *mphf = NULL;
|
2005-01-18 14:18:51 +02:00
|
|
|
float c = mph->c;
|
2005-01-21 22:42:33 +02:00
|
|
|
|
|
|
|
DEBUGP("Creating mph with algorithm %s\n", cmph_names[mph->algo]);
|
2004-12-23 15:16:30 +02:00
|
|
|
switch (mph->algo)
|
|
|
|
{
|
2005-01-18 23:06:08 +02:00
|
|
|
case CMPH_CZECH:
|
2004-12-23 15:16:30 +02:00
|
|
|
DEBUGP("Creating czech hash\n");
|
2005-01-21 22:42:33 +02:00
|
|
|
mph->data = czech_config_new(mph->key_source);
|
2005-01-18 14:18:51 +02:00
|
|
|
if (c == 0) c = 2.09;
|
2005-01-21 22:42:33 +02:00
|
|
|
mphf = czech_new(mph, c);
|
2004-12-23 15:16:30 +02:00
|
|
|
break;
|
2005-01-18 23:06:08 +02:00
|
|
|
case CMPH_BMZ: /* included -- Fabiano */
|
2004-12-23 15:16:30 +02:00
|
|
|
DEBUGP("Creating bmz hash\n");
|
2005-01-21 22:42:33 +02:00
|
|
|
mph->data = bmz_config_new(mph->key_source);
|
2005-01-18 14:18:51 +02:00
|
|
|
if (c == 0) c = 1.15;
|
2005-01-21 22:42:33 +02:00
|
|
|
mphf = bmz_new(mph, c);
|
2004-12-23 15:16:30 +02:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
assert(0);
|
|
|
|
}
|
|
|
|
return mphf;
|
|
|
|
}
|
|
|
|
|
2005-01-21 22:42:33 +02:00
|
|
|
int cmph_dump(cmph_t *mphf, FILE *f)
|
2004-12-23 15:16:30 +02:00
|
|
|
{
|
|
|
|
switch (mphf->algo)
|
|
|
|
{
|
2005-01-18 23:06:08 +02:00
|
|
|
case CMPH_CZECH:
|
2005-01-21 22:42:33 +02:00
|
|
|
return czech_dump(mphf, f);
|
2004-12-23 15:16:30 +02:00
|
|
|
break;
|
2005-01-18 23:06:08 +02:00
|
|
|
case CMPH_BMZ: /* included -- Fabiano */
|
2005-01-21 22:42:33 +02:00
|
|
|
return bmz_dump(mphf, f);
|
2004-12-23 15:16:30 +02:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
assert(0);
|
|
|
|
}
|
|
|
|
assert(0);
|
|
|
|
return 0;
|
|
|
|
}
|
2005-01-21 22:42:33 +02:00
|
|
|
cmph_t *cmph_load(FILE *f)
|
2004-12-23 15:16:30 +02:00
|
|
|
{
|
2005-01-21 22:42:33 +02:00
|
|
|
cmph_t *mphf = NULL;
|
2004-12-23 15:16:30 +02:00
|
|
|
DEBUGP("Loading mphf generic parts\n");
|
2005-01-21 22:42:33 +02:00
|
|
|
mphf = __cmph_load(f);
|
2004-12-23 15:16:30 +02:00
|
|
|
if (mphf == NULL) return NULL;
|
|
|
|
DEBUGP("Loading mphf algorithm dependent parts\n");
|
|
|
|
|
|
|
|
switch (mphf->algo)
|
|
|
|
{
|
2005-01-18 23:06:08 +02:00
|
|
|
case CMPH_CZECH:
|
2005-01-21 22:42:33 +02:00
|
|
|
czech_load(f, mphf);
|
2004-12-23 15:16:30 +02:00
|
|
|
break;
|
2005-01-18 23:06:08 +02:00
|
|
|
case CMPH_BMZ: /* included -- Fabiano */
|
2004-12-23 15:16:30 +02:00
|
|
|
DEBUGP("Loading bmz algorithm dependent parts\n");
|
2005-01-21 22:42:33 +02:00
|
|
|
bmz_load(f, mphf);
|
2004-12-23 15:16:30 +02:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
assert(0);
|
|
|
|
}
|
|
|
|
DEBUGP("Loaded mphf\n");
|
|
|
|
return mphf;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2005-01-21 22:42:33 +02:00
|
|
|
cmph_uint32 cmph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
2004-12-23 15:16:30 +02:00
|
|
|
{
|
|
|
|
DEBUGP("mphf algorithm: %u \n", mphf->algo);
|
|
|
|
switch(mphf->algo)
|
|
|
|
{
|
2005-01-18 23:06:08 +02:00
|
|
|
case CMPH_CZECH:
|
2005-01-21 22:42:33 +02:00
|
|
|
return czech_search(mphf, key, keylen);
|
2005-01-18 23:06:08 +02:00
|
|
|
case CMPH_BMZ: /* included -- Fabiano */
|
2004-12-23 15:16:30 +02:00
|
|
|
DEBUGP("bmz algorithm search\n");
|
2005-01-21 22:42:33 +02:00
|
|
|
return bmz_search(mphf, key, keylen);
|
2004-12-23 15:16:30 +02:00
|
|
|
default:
|
|
|
|
assert(0);
|
|
|
|
}
|
|
|
|
assert(0);
|
2005-01-18 14:18:51 +02:00
|
|
|
return 0;
|
2004-12-23 15:16:30 +02:00
|
|
|
}
|
|
|
|
|
2005-01-21 22:42:33 +02:00
|
|
|
cmph_uint32 cmph_size(cmph_t *mphf)
|
2004-12-23 15:16:30 +02:00
|
|
|
{
|
|
|
|
return mphf->size;
|
|
|
|
}
|
|
|
|
|
2005-01-21 22:42:33 +02:00
|
|
|
void cmph_destroy(cmph_t *mphf)
|
2004-12-23 15:16:30 +02:00
|
|
|
{
|
|
|
|
switch(mphf->algo)
|
|
|
|
{
|
2005-01-18 23:06:08 +02:00
|
|
|
case CMPH_CZECH:
|
2005-01-21 22:42:33 +02:00
|
|
|
czech_destroy(mphf);
|
2004-12-23 15:16:30 +02:00
|
|
|
return;
|
2005-01-18 23:06:08 +02:00
|
|
|
case CMPH_BMZ: /* included -- Fabiano */
|
2005-01-21 22:42:33 +02:00
|
|
|
bmz_destroy(mphf);
|
2004-12-23 15:16:30 +02:00
|
|
|
return;
|
|
|
|
default:
|
|
|
|
assert(0);
|
|
|
|
}
|
|
|
|
assert(0);
|
|
|
|
return;
|
|
|
|
}
|