diff --git a/BDZ.t2t b/BDZ.t2t index 68fc11b..e0b2a45 100755 --- a/BDZ.t2t +++ b/BDZ.t2t @@ -6,26 +6,33 @@ BDZ Algorithm ---------------------------------------- ==Introduction== +Coming soon... ---------------------------------------- ==The Algorithm== +Coming soon... + ---------------------------------------- ===Mapping Step=== +Coming soon... ---------------------------------------- ===Assigning Step=== +Coming soon... + ---------------------------------------- ===Ranking Step=== +Coming soon... ---------------------------------------- diff --git a/EXAMPLES.t2t b/EXAMPLES.t2t new file mode 100644 index 0000000..8f523bf --- /dev/null +++ b/EXAMPLES.t2t @@ -0,0 +1,135 @@ +CMPH - Examples + + +%!includeconf: CONFIG.t2t + +Using cmph is quite simple. Take a look in the following examples. + +------------------------------------------------------------------- + +``` +#include +#include +// Create minimal perfect hash function from in-memory vector +int main(int argc, char **argv) +{ + // Creating a filled vector + const char *vector[] = {"aaaaaaaaaa", "bbbbbbbbbb", "cccccccccc", "dddddddddd", "eeeeeeeeee", + "ffffffffff", "gggggggggg", "hhhhhhhhhh", "iiiiiiiiii", "jjjjjjjjjj"}; + unsigned int nkeys = 10; + // Source of keys + cmph_io_adapter_t *source = cmph_io_vector_adapter((char **)vector, nkeys); + + //Create minimal perfect hash function using the default (chm) algorithm. + cmph_config_t *config = cmph_config_new(source); + cmph_t *hash = cmph_new(config); + cmph_config_destroy(config); + + //Find key + const char *key = "jjjjjjjjjj"; + unsigned int id = cmph_search(hash, key, strlen(key)); + fprintf(stderr, "Id:%u\n", id); + //Destroy hash + cmph_destroy(hash); + cmph_io_vector_adapter_destroy(source); + return 0; +} +``` +Download [vector_adapter_ex1.c examples/vector_adapter_ex1.c]. This example does not work in versions below 0.3. +------------------------------- + +``` +#include +#include +// Create minimal perfect hash function from in-memory vector + +#pragma pack(1) +typedef struct { + cmph_uint32 id; + char key[11]; + cmph_uint32 year; +} rec_t; +#pragma pack(0) + +int main(int argc, char **argv) +{ + // Creating a filled vector + unsigned int i = 0; + rec_t vector[10] = {{1, "aaaaaaaaaa", 1999}, {2, "bbbbbbbbbb", 2000}, {3, "cccccccccc", 2001}, + {4, "dddddddddd", 2002}, {5, "eeeeeeeeee", 2003}, {6, "ffffffffff", 2004}, + {7, "gggggggggg", 2005}, {8, "hhhhhhhhhh", 2006}, {9, "iiiiiiiiii", 2007}, + {10,"jjjjjjjjjj", 2008}}; + unsigned int nkeys = 10; + FILE* mphf_fd = fopen("temp_struct_vector.mph", "w"); + // Source of keys + cmph_io_adapter_t *source = cmph_io_struct_vector_adapter(vector, sizeof(rec_t), sizeof(cmph_uint32), 11, nkeys); + + //Create minimal perfect hash function using the default (chm) algorithm. + cmph_config_t *config = cmph_config_new(source); + cmph_config_set_algo(config, CMPH_BDZ); + cmph_config_set_mphf_fd(config, mphf_fd); + cmph_t *hash = cmph_new(config); + cmph_config_destroy(config); + cmph_dump(hash, mphf_fd); + cmph_destroy(hash); + fclose(mphf_fd); + + //Find key + mphf_fd = fopen("temp_struct_vector.mph", "r"); + hash = cmph_load(mphf_fd); + while (i < nkeys) { + const char *key = vector[i].key; + unsigned int id = cmph_search(hash, key, 11); + fprintf(stderr, "key:%s -- hash:%u\n", key, id); + i++; + } + + //Destroy hash + cmph_destroy(hash); + cmph_io_vector_adapter_destroy(source); + fclose(mphf_fd); + return 0; +} +``` +Download [struct_vector_adapter_ex3.c examples/struct_vector_adapter_ex3.c]. This example does not work in versions below 0.7. +------------------------------- + +``` +#include +#include +#include + // Create minimal perfect hash function from in-disk keys using BMZ algorithm +int main(int argc, char **argv) +{ + //Open file with newline separated list of keys + FILE * keys_fd = fopen("keys.txt", "r"); + cmph_t *hash = NULL; + if (keys_fd == NULL) + { + fprintf(stderr, "File \"keys.txt\" not found\n"); + exit(1); + } + // Source of keys + cmph_io_adapter_t *source = cmph_io_nlfile_adapter(keys_fd); + + cmph_config_t *config = cmph_config_new(source); + cmph_config_set_algo(config, CMPH_BMZ); + hash = cmph_new(config); + cmph_config_destroy(config); + + //Find key + const char *key = "jjjjjjjjjj"; + unsigned int id = cmph_search(hash, key, strlen(key)); + fprintf(stderr, "Id:%u\n", id); + //Destroy hash + cmph_destroy(hash); + cmph_io_nlfile_adapter_destroy(source); + fclose(keys_fd); + return 0; +} +``` +Download [file_adapter_ex2.c examples/file_adapter_ex2.c] and [keys.txt examples/keys.txt] + +%!include: ALGORITHMS.t2t + +%!include: FOOTER.t2t diff --git a/NEWSLOG.t2t b/NEWSLOG.t2t index 25ad2ce..772757a 100644 --- a/NEWSLOG.t2t +++ b/NEWSLOG.t2t @@ -10,7 +10,7 @@ News Log - [An algorithm to generate MPHFs that require around 2.6 bits per key to be stored bdz.html], which is referred to as BDZ algorithm. The algorithm is the fastest one available in the literature for sets that can be treated in internal memory. - [An algorithm to generate PHFs with range m = cn, for c > 1.22 bdz.html], which is referred to as BDZ_PH algorithm. It is actually the BDZ algorithm without the ranking step. The resulting functions can be stored in 1.95 bits per key for //c = 1.23// and are considerably faster than the MPHFs generated by the BDZ algorithm. - An adapter to support a vector of struct as the source of keys has been added. -- An API to support the ability of packing a perfect hash function into a preallocated contiguous memory space. The computation of the packed function is still faster and can be easily mmapped. +- An API to support the ability of packing a perfect hash function into a preallocated contiguous memory space. The computation of a packed function is still faster and can be easily mmapped. - The hash functions djb2, fnv and sdbm were removed because they do not use random seeds and therefore are not useful for MPHFs algorithms. - All reported bugs and suggestions have been corrected and included as well. diff --git a/README.t2t b/README.t2t index a2fb75c..71cf01e 100644 --- a/README.t2t +++ b/README.t2t @@ -83,7 +83,7 @@ The CMPH Library encapsulates the newest and more efficient algorithms in an eas - [An algorithm to generate MPHFs that require around 2.6 bits per key to be stored bdz.html], which is referred to as BDZ algorithm. The algorithm is the fastest one available in the literature for sets that can be treated in internal memory. - [An algorithm to generate PHFs with range m = cn, for c > 1.22 bdz.html], which is referred to as BDZ_PH algorithm. It is actually the BDZ algorithm without the ranking step. The resulting functions can be stored in 1.95 bits per key for //c = 1.23// and are considerably faster than the MPHFs generated by the BDZ algorithm. - An adapter to support a vector of struct as the source of keys has been added. -- An API to support the ability of packing a perfect hash function into a preallocated contiguous memory space. The computation of the packed function is still faster and can be easily mmapped. +- An API to support the ability of packing a perfect hash function into a preallocated contiguous memory space. The computation of a packed function is still faster and can be easily mmapped. - The hash functions djb2, fnv and sdbm were removed because they do not use random seeds and therefore are not useful for MPHFs algorithms. - All reported bugs and suggestions have been corrected and included as well. diff --git a/gendocs b/gendocs index e8f34d0..0fc609d 100755 --- a/gendocs +++ b/gendocs @@ -11,6 +11,7 @@ txt2tags -t html -i GPERF.t2t -o gperf.html txt2tags -t html -i FAQ.t2t -o faq.html txt2tags -t html -i CONCEPTS.t2t -o concepts.html txt2tags -t html -i NEWSLOG.t2t -o newslog.html +txt2tags -t html -i EXAMPLES.t2t -o examples.html txt2tags -t txt --mask-email -i README.t2t -o README txt2tags -t txt -i BDZ.t2t -o BDZ @@ -23,3 +24,4 @@ txt2tags -t txt -i GPERF.t2t -o GPERF txt2tags -t txt -i FAQ.t2t -o FAQ txt2tags -t txt -i CONCEPTS.t2t -o CONCEPTS txt2tags -t txt -i NEWSLOG.t2t -o NEWSLOG +txt2tags -t txt -i EXAMPLES.t2t -o EXAMPLES