diff --git a/ChangeLog b/ChangeLog index 5534600..79d6b1d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,37 @@ +2005-01-19 10:40 davi + + * acinclude.m4, configure.ac: Added macros for large file support. + +2005-01-18 19:06 fc_botelho + + * src/: bmz.c, bmz.h, bmz_structs.h, cmph.c, cmph.h, + cmph_structs.c, cmph_structs.h, cmph_types.h, czech.c, czech.h, + czech_structs.h, djb2_hash.c, djb2_hash.h, fnv_hash.c, fnv_hash.h, + graph.c, graph.h, hash.c, hash.h, hash_state.h, jenkins_hash.c, + jenkins_hash.h, main.c, sdbm_hash.c, sdbm_hash.h, vqueue.c, + vqueue.h, vstack.c, vstack.h: version with cmph prefix + +2005-01-18 15:10 davi + + * ChangeLog, cmph.vcproj, cmphapp.vcproj, wingetopt.c, wingetopt.h: + Added missing files. + +2005-01-18 14:25 fc_botelho + + * aclocal.m4: initial version + +2005-01-18 14:16 fc_botelho + + * aclocal.m4: initial version + +2005-01-18 13:58 fc_botelho + + * src/czech.c: using bit mask to represent boolean values + +2005-01-18 13:56 fc_botelho + + * src/czech.c: no message + 2005-01-18 10:18 davi * COPYING, INSTALL, src/Makefile.am, src/bmz.c, src/bmz.h, diff --git a/README b/README index e69de29..6a1cdd0 100644 --- a/README +++ b/README @@ -0,0 +1,147 @@ +== cmph - C Minimal Perfect Hashing Library == + +Description + +C Minimal Perfect Hashing Library is a portable LGPLed library to create and +work with minimal perfect hashes. The cmph library encapsulates the newest +and more efficient algorithms in the literature in a ease-to-use, +production-quality, fast API. The library is designed to work big entries that +won't fit in the main memory. It has been used successfully to create hashes +bigger than 100 million entries. Although there is a lack of similar libraries +in the free software world, we can point out some of the "distinguishing" +features of cmph: + +- Fast +- Space-efficient with main memory usage carefully documented +- The best modern algorithms are available (or at least scheduled for implementation :-)) +- Object oriented implementation +- Works with in-disk key sets through use of adapter pattern +- Serialization of hash functions +- Easily extensible +- Well encapsulated API aiming binary compatibility through releases +- Free Software + +News for version 0.3 + +- New heuristics in bmz algorithm, providing hash creation with only + (0.93 * 16 + 4)*n bytes and hash query with (0.93*4)n bytes + +Examples + +Using cmph is quite ease. Take a look. + + + // Create minimal perfect hash from in-memory vector + #include + ... + + const char **vector; + unsigned int nkeys; + //Fill vector + //... + + //Create minimal perfect hash + cmph_config_t *config = cmph_config_new(cmph_io_vector_adapter(vector, nkeys)); + cmph_t *hash = cmph_new(config); + cmph_config_destroy(config); + + //Find key + const char *key = "sample key"; + unsigned int id = cmph_search(hash, key); + + //Destroy hash + cmph_destroy(hash); + +------------------------------- + + + // Create minimal perfect hash from in-disk keys using BMZ algorithm + #include + ... + + //Open file with newline separated list of keys + FILE *fd = fopen("keysfile_newline_separated", "r"); + //check for errors + //... + + cmph_config_t *config = cmph_config_new(cmph_io_nlfile_adapter(fd)); + cmph_config_set_algo(config, CMPH_BMZ); + cmph_t *hash = cmph_new(config); + cmph_config_destroy(config); + fclose(fd); + + //Find key + const char *key = "sample key"; + unsigned int id = cmph_search(hash, key); + + //Destroy hash + cmph_destroy(hash); + +-------------------------------------- + +The cmph application + +cmph is the name of both the library and the utility +application that comes with this package. You can use the cmph +application to create minimal perfect hashes from command line. The cmph utility +comes with a number of flags, but it is very simple to create and query +minimal perfect hashes: + + + $ # Create mph for keys in file keys_file + $ ./cmph keys_file + $ # Query id of keys in the file keys_query + $ ./cmph -m keys_file.mph keys_query + + +The additional options let you set most of the parameters you have +available through the C API. Below you can see the full help message for the +utility. + + + usage: cmph [-v] [-h] [-V] [-k] [-g [-s seed] ] [-m file.mph] [-a algorithm] keysfile + Minimum perfect hashing tool + + -h print this help message + -c c value that determines the number of vertices in the graph + -a algorithm - valid values are + * czech + * bmz + -f hash function (may be used multiple times) - valid values are + * jenkins + * djb2 + * sdbm + * fnv + * glib + * pjw + -V print version number and exit + -v increase verbosity (may be used multiple times) + -k number of keys + -g generation mode + -s random seed + -m minimum perfect hash function file + keysfile line separated file with keys + + +Downloads + +Use the project page at sourceforge: http://sf.net/projects/cmph + +License Stuff + +Code is under the LGPL. + +---------------------------------------- + +Enjoy! + +Davi de Castro Reis + +Fabiano Cupertino Botelho + +Last Updated: Thu Jan 20 11:01:01 2005 + + + + + diff --git a/README.t2t b/README.t2t new file mode 100644 index 0000000..a40b32d --- /dev/null +++ b/README.t2t @@ -0,0 +1,149 @@ +== cmph - C Minimal Perfect Hashing Library == + + +**Description** + +C Minimal Perfect Hashing Library is a portable LGPLed library to create and +work with minimal perfect hashes. The cmph library encapsulates the newest +and more efficient algorithms in the literature in a ease-to-use, +production-quality, fast API. The library is designed to work big entries that +won't fit in the main memory. It has been used successfully to create hashes +bigger than 100 million entries. Although there is a lack of similar libraries +in the free software world, we can point out some of the "distinguishing" +features of cmph: + +- Fast +- Space-efficient with main memory usage carefully documented +- The best modern algorithms are available (or at least scheduled for implementation :-)) +- Object oriented implementation +- Works with in-disk key sets through use of adapter pattern +- Serialization of hash functions +- Easily extensible +- Well encapsulated API aiming binary compatibility through releases +- Free Software + + +**News for version 0.3** + +- New heuristics in bmz algorithm, providing hash creation with only + (0.93 * 16 + 4)*n bytes and hash query with (0.93*4)n bytes + +**Examples** + +Using cmph is quite ease. Take a look. + + +``` + // Create minimal perfect hash from in-memory vector + #include + ... + + const char **vector; + unsigned int nkeys; + //Fill vector + //... + + //Create minimal perfect hash + cmph_config_t *config = cmph_config_new(cmph_io_vector_adapter(vector, nkeys)); + cmph_t *hash = cmph_new(config); + cmph_config_destroy(config); + + //Find key + const char *key = "sample key"; + unsigned int id = cmph_search(hash, key); + + //Destroy hash + cmph_destroy(hash); +``` +------------------------------- + +``` + // Create minimal perfect hash from in-disk keys using BMZ algorithm + #include + ... + + //Open file with newline separated list of keys + FILE *fd = fopen("keysfile_newline_separated", "r"); + //check for errors + //... + + cmph_config_t *config = cmph_config_new(cmph_io_nlfile_adapter(fd)); + cmph_config_set_algo(config, CMPH_BMZ); + cmph_t *hash = cmph_new(config); + cmph_config_destroy(config); + fclose(fd); + + //Find key + const char *key = "sample key"; + unsigned int id = cmph_search(hash, key); + + //Destroy hash + cmph_destroy(hash); +``` +-------------------------------------- + + +**The cmph application** + +cmph is the name of both the library and the utility +application that comes with this package. You can use the cmph +application to create minimal perfect hashes from command line. The cmph utility +comes with a number of flags, but it is very simple to create and query +minimal perfect hashes: + +``` + $ # Create mph for keys in file keys_file + $ ./cmph keys_file + $ # Query id of keys in the file keys_query + $ ./cmph -m keys_file.mph keys_query +``` + +The additional options let you set most of the parameters you have +available through the C API. Below you can see the full help message for the +utility. + + +``` + usage: cmph [-v] [-h] [-V] [-k] [-g [-s seed] ] [-m file.mph] [-a algorithm] keysfile + Minimum perfect hashing tool + + -h print this help message + -c c value that determines the number of vertices in the graph + -a algorithm - valid values are + * czech + * bmz + -f hash function (may be used multiple times) - valid values are + * jenkins + * djb2 + * sdbm + * fnv + * glib + * pjw + -V print version number and exit + -v increase verbosity (may be used multiple times) + -k number of keys + -g generation mode + -s random seed + -m minimum perfect hash function file + keysfile line separated file with keys +``` + +**Downloads** + +Use the project page at sourceforge: http://sf.net/projects/cmph + +**License Stuff** + +Code is under the LGPL. +---------------------------------------- + +Enjoy! + +Davi de Castro Reis + +Fabiano Cupertino Botelho + + +%preproc(html): '^%html% ' '' +%html% SourceForge.net Logo +Last Updated: %%date(%c) diff --git a/src/main.c b/src/main.c index c556da1..fcc9d01 100644 --- a/src/main.c +++ b/src/main.c @@ -1,4 +1,4 @@ -#include +#include "../wingetopt.h" #include #include #include @@ -8,7 +8,6 @@ #include #include "cmph.h" #include "hash.h" -#include "../wingetopt.h" #ifdef WIN32 #define VERSION "0.2" diff --git a/wingetopt.h b/wingetopt.h index 4df5d78..9596853 100644 --- a/wingetopt.h +++ b/wingetopt.h @@ -16,8 +16,8 @@ extern "C" { int opterr = 1; /* non-zero if a question mark should be returned */ int getopt(int argc, char *argv[], char *opstring); - #endif -#endif + #endif //_GETOPT_ +#endif //WIN32 #ifdef __cplusplus }