From 2cfffbcc9df4f2e27147369ff9b62638719430b4 Mon Sep 17 00:00:00 2001 From: fc_botelho Date: Wed, 18 Mar 2009 22:08:46 +0000 Subject: [PATCH] *** empty log message *** --- configure.ac | 2 +- src/chd_ph.c | 4 ++-- src/main.c | 9 +++++---- tests/packed_mphf_tests.c | 27 +++++++++++++++++++++------ 4 files changed, 29 insertions(+), 13 deletions(-) diff --git a/configure.ac b/configure.ac index 2ebde34..71b0a07 100644 --- a/configure.ac +++ b/configure.ac @@ -1,6 +1,6 @@ dnl Process this file with autoconf to produce a configure script. AC_INIT(Makefile.am) -AM_INIT_AUTOMAKE(cmph, 0.8) +AM_INIT_AUTOMAKE(cmph, 0.9) AM_CONFIG_HEADER(config.h) dnl Checks for programs. diff --git a/src/chd_ph.c b/src/chd_ph.c index 47d44fd..fe3511d 100644 --- a/src/chd_ph.c +++ b/src/chd_ph.c @@ -179,7 +179,7 @@ void chd_ph_config_set_b(cmph_config_t *mph, cmph_uint32 keys_per_bucket) { assert(mph); chd_ph_config_data_t *chd_ph = (chd_ph_config_data_t *)mph->data; - if(keys_per_bucket <= 1 || keys_per_bucket >= 15) + if(keys_per_bucket < 1 || keys_per_bucket >= 15) { keys_per_bucket = 4; } @@ -542,7 +542,7 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c) if(mph->verbosity && chd_ph->keys_per_bin == 1) { - fprintf(stderr, "space lower bound is %.3f bits per key", chd_ph_space_lower_bound(chd_ph->m, chd_ph->n)); + fprintf(stderr, "space lower bound is %.3f bits per key\n", chd_ph_space_lower_bound(chd_ph->m, chd_ph->n)); } // We allocate the working tables diff --git a/src/main.c b/src/main.c index 658ee09..a8aa9ea 100644 --- a/src/main.c +++ b/src/main.c @@ -82,7 +82,7 @@ int main(int argc, char **argv) cmph_io_adapter_t *source; cmph_uint32 memory_availability = 0; cmph_uint32 b = 0; - cmph_uint32 keys_per_bin = 0; + cmph_uint32 keys_per_bin = 1; while (1) { char ch = getopt(argc, argv, "hVvgc:k:a:M:b:t:f:m:d:s:"); @@ -299,7 +299,7 @@ int main(int argc, char **argv) return -1; } cmph_uint32 siz = cmph_size(mphf); - hashtable = (cmph_uint8*)malloc(siz*sizeof(cmph_uint8)); + hashtable = (cmph_uint8*)calloc(siz, sizeof(cmph_uint8)); memset(hashtable, 0,(size_t) siz); //check all keys for (i = 0; i < source->nkeys; ++i) @@ -313,11 +313,12 @@ int main(int argc, char **argv) { fprintf(stderr, "Unknown key %*s in the input.\n", buflen, buf); ret = 1; - } else if(hashtable[h]) + } else if(hashtable[h] >= keys_per_bin) { + fprintf(stderr, "More than %u keys were mapped to bin %u\n", keys_per_bin, h); fprintf(stderr, "Duplicated or unknown key %*s in the input\n", buflen, buf); ret = 1; - } else hashtable[h] = 1; + } else hashtable[h]++; if (verbosity) { diff --git a/tests/packed_mphf_tests.c b/tests/packed_mphf_tests.c index 3bf161f..f800b97 100644 --- a/tests/packed_mphf_tests.c +++ b/tests/packed_mphf_tests.c @@ -22,15 +22,17 @@ void usage(const char *prg) { - fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-m file.mph] keysfile\n", prg); + fprintf(stderr, "usage: %s [-v] [-h] [-V] [-t keys_per_bin] [-k nkeys] [-m file.mph] keysfile\n", prg); } void usage_long(const char *prg) { - fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-m file.mph] keysfile\n", prg); + fprintf(stderr, "usage: %s [-v] [-h] [-V] [-t keys_per_bin] [-k nkeys] [-m file.mph] keysfile\n", prg); fprintf(stderr, "Packed MPHFs testing tool\n\n"); fprintf(stderr, " -h\t print this help message\n"); fprintf(stderr, " -V\t print version number and exit\n"); fprintf(stderr, " -v\t increase verbosity (may be used multiple times)\n"); + fprintf(stderr, " -t\t set the number of keys per bin for a t-perfect hashing function.\n"); + fprintf(stderr, " \t A t-perfect hashing function allows at most t collisions in a given bin.\n"); fprintf(stderr, " -k\t number of keys\n"); fprintf(stderr, " -m\t minimum perfect hash function file \n"); fprintf(stderr, " keysfile\t line separated file with keys\n"); @@ -47,9 +49,11 @@ int main(int argc, char **argv) cmph_uint32 i = 0; cmph_t *mphf = NULL; cmph_io_adapter_t *source; + cmph_uint32 keys_per_bin = 1; + while (1) { - char ch = getopt(argc, argv, "hVvk:m:"); + char ch = getopt(argc, argv, "hVvt:k:m:"); if (ch == -1) break; switch (ch) { @@ -69,6 +73,16 @@ int main(int argc, char **argv) case 'v': ++verbosity; break; + case 't': + { + char *cptr; + keys_per_bin = strtoul(optarg, &cptr, 10); + if(*cptr != 0) { + fprintf(stderr, "Parameter t was not found: %s\n", optarg); + exit(1); + } + } + break; case 'V': printf("%s\n", VERSION); return 0; @@ -124,7 +138,7 @@ int main(int argc, char **argv) return -1; } cmph_uint32 siz = cmph_size(mphf); - hashtable = (cmph_uint8*)malloc(siz*sizeof(cmph_uint8)); + hashtable = (cmph_uint8*)calloc(siz, sizeof(cmph_uint8)); memset(hashtable, 0, (size_t)siz); // packing the function @@ -152,11 +166,12 @@ int main(int argc, char **argv) { fprintf(stderr, "Unknown key %*s in the input.\n", buflen, buf); ret = 1; - } else if(hashtable[h]) + } else if(hashtable[h] >= keys_per_bin) { + fprintf(stderr, "More than %u keys were mapped to bin %u\n", keys_per_bin, h); fprintf(stderr, "Duplicated or unknown key %*s in the input\n", buflen, buf); ret = 1; - } else hashtable[h] = 1; + } else hashtable[h]++; if (verbosity) {