1
Fork 0

*** empty log message ***

This commit is contained in:
fc_botelho 2008-03-30 00:59:30 +00:00
parent a5cdc7743f
commit 80c14026b4
5 changed files with 146 additions and 2 deletions

View File

@ -6,26 +6,33 @@ BDZ Algorithm
---------------------------------------- ----------------------------------------
==Introduction== ==Introduction==
Coming soon...
---------------------------------------- ----------------------------------------
==The Algorithm== ==The Algorithm==
Coming soon...
---------------------------------------- ----------------------------------------
===Mapping Step=== ===Mapping Step===
Coming soon...
---------------------------------------- ----------------------------------------
===Assigning Step=== ===Assigning Step===
Coming soon...
---------------------------------------- ----------------------------------------
===Ranking Step=== ===Ranking Step===
Coming soon...
---------------------------------------- ----------------------------------------

135
EXAMPLES.t2t Normal file
View File

@ -0,0 +1,135 @@
CMPH - Examples
%!includeconf: CONFIG.t2t
Using cmph is quite simple. Take a look in the following examples.
-------------------------------------------------------------------
```
#include <cmph.h>
#include <string.h>
// Create minimal perfect hash function from in-memory vector
int main(int argc, char **argv)
{
// Creating a filled vector
const char *vector[] = {"aaaaaaaaaa", "bbbbbbbbbb", "cccccccccc", "dddddddddd", "eeeeeeeeee",
"ffffffffff", "gggggggggg", "hhhhhhhhhh", "iiiiiiiiii", "jjjjjjjjjj"};
unsigned int nkeys = 10;
// Source of keys
cmph_io_adapter_t *source = cmph_io_vector_adapter((char **)vector, nkeys);
//Create minimal perfect hash function using the default (chm) algorithm.
cmph_config_t *config = cmph_config_new(source);
cmph_t *hash = cmph_new(config);
cmph_config_destroy(config);
//Find key
const char *key = "jjjjjjjjjj";
unsigned int id = cmph_search(hash, key, strlen(key));
fprintf(stderr, "Id:%u\n", id);
//Destroy hash
cmph_destroy(hash);
cmph_io_vector_adapter_destroy(source);
return 0;
}
```
Download [vector_adapter_ex1.c examples/vector_adapter_ex1.c]. This example does not work in versions below 0.3.
-------------------------------
```
#include <cmph.h>
#include <string.h>
// Create minimal perfect hash function from in-memory vector
#pragma pack(1)
typedef struct {
cmph_uint32 id;
char key[11];
cmph_uint32 year;
} rec_t;
#pragma pack(0)
int main(int argc, char **argv)
{
// Creating a filled vector
unsigned int i = 0;
rec_t vector[10] = {{1, "aaaaaaaaaa", 1999}, {2, "bbbbbbbbbb", 2000}, {3, "cccccccccc", 2001},
{4, "dddddddddd", 2002}, {5, "eeeeeeeeee", 2003}, {6, "ffffffffff", 2004},
{7, "gggggggggg", 2005}, {8, "hhhhhhhhhh", 2006}, {9, "iiiiiiiiii", 2007},
{10,"jjjjjjjjjj", 2008}};
unsigned int nkeys = 10;
FILE* mphf_fd = fopen("temp_struct_vector.mph", "w");
// Source of keys
cmph_io_adapter_t *source = cmph_io_struct_vector_adapter(vector, sizeof(rec_t), sizeof(cmph_uint32), 11, nkeys);
//Create minimal perfect hash function using the default (chm) algorithm.
cmph_config_t *config = cmph_config_new(source);
cmph_config_set_algo(config, CMPH_BDZ);
cmph_config_set_mphf_fd(config, mphf_fd);
cmph_t *hash = cmph_new(config);
cmph_config_destroy(config);
cmph_dump(hash, mphf_fd);
cmph_destroy(hash);
fclose(mphf_fd);
//Find key
mphf_fd = fopen("temp_struct_vector.mph", "r");
hash = cmph_load(mphf_fd);
while (i < nkeys) {
const char *key = vector[i].key;
unsigned int id = cmph_search(hash, key, 11);
fprintf(stderr, "key:%s -- hash:%u\n", key, id);
i++;
}
//Destroy hash
cmph_destroy(hash);
cmph_io_vector_adapter_destroy(source);
fclose(mphf_fd);
return 0;
}
```
Download [struct_vector_adapter_ex3.c examples/struct_vector_adapter_ex3.c]. This example does not work in versions below 0.7.
-------------------------------
```
#include <cmph.h>
#include <stdio.h>
#include <string.h>
// Create minimal perfect hash function from in-disk keys using BMZ algorithm
int main(int argc, char **argv)
{
//Open file with newline separated list of keys
FILE * keys_fd = fopen("keys.txt", "r");
cmph_t *hash = NULL;
if (keys_fd == NULL)
{
fprintf(stderr, "File \"keys.txt\" not found\n");
exit(1);
}
// Source of keys
cmph_io_adapter_t *source = cmph_io_nlfile_adapter(keys_fd);
cmph_config_t *config = cmph_config_new(source);
cmph_config_set_algo(config, CMPH_BMZ);
hash = cmph_new(config);
cmph_config_destroy(config);
//Find key
const char *key = "jjjjjjjjjj";
unsigned int id = cmph_search(hash, key, strlen(key));
fprintf(stderr, "Id:%u\n", id);
//Destroy hash
cmph_destroy(hash);
cmph_io_nlfile_adapter_destroy(source);
fclose(keys_fd);
return 0;
}
```
Download [file_adapter_ex2.c examples/file_adapter_ex2.c] and [keys.txt examples/keys.txt]
%!include: ALGORITHMS.t2t
%!include: FOOTER.t2t

View File

@ -10,7 +10,7 @@ News Log
- [An algorithm to generate MPHFs that require around 2.6 bits per key to be stored bdz.html], which is referred to as BDZ algorithm. The algorithm is the fastest one available in the literature for sets that can be treated in internal memory. - [An algorithm to generate MPHFs that require around 2.6 bits per key to be stored bdz.html], which is referred to as BDZ algorithm. The algorithm is the fastest one available in the literature for sets that can be treated in internal memory.
- [An algorithm to generate PHFs with range m = cn, for c > 1.22 bdz.html], which is referred to as BDZ_PH algorithm. It is actually the BDZ algorithm without the ranking step. The resulting functions can be stored in 1.95 bits per key for //c = 1.23// and are considerably faster than the MPHFs generated by the BDZ algorithm. - [An algorithm to generate PHFs with range m = cn, for c > 1.22 bdz.html], which is referred to as BDZ_PH algorithm. It is actually the BDZ algorithm without the ranking step. The resulting functions can be stored in 1.95 bits per key for //c = 1.23// and are considerably faster than the MPHFs generated by the BDZ algorithm.
- An adapter to support a vector of struct as the source of keys has been added. - An adapter to support a vector of struct as the source of keys has been added.
- An API to support the ability of packing a perfect hash function into a preallocated contiguous memory space. The computation of the packed function is still faster and can be easily mmapped. - An API to support the ability of packing a perfect hash function into a preallocated contiguous memory space. The computation of a packed function is still faster and can be easily mmapped.
- The hash functions djb2, fnv and sdbm were removed because they do not use random seeds and therefore are not useful for MPHFs algorithms. - The hash functions djb2, fnv and sdbm were removed because they do not use random seeds and therefore are not useful for MPHFs algorithms.
- All reported bugs and suggestions have been corrected and included as well. - All reported bugs and suggestions have been corrected and included as well.

View File

@ -83,7 +83,7 @@ The CMPH Library encapsulates the newest and more efficient algorithms in an eas
- [An algorithm to generate MPHFs that require around 2.6 bits per key to be stored bdz.html], which is referred to as BDZ algorithm. The algorithm is the fastest one available in the literature for sets that can be treated in internal memory. - [An algorithm to generate MPHFs that require around 2.6 bits per key to be stored bdz.html], which is referred to as BDZ algorithm. The algorithm is the fastest one available in the literature for sets that can be treated in internal memory.
- [An algorithm to generate PHFs with range m = cn, for c > 1.22 bdz.html], which is referred to as BDZ_PH algorithm. It is actually the BDZ algorithm without the ranking step. The resulting functions can be stored in 1.95 bits per key for //c = 1.23// and are considerably faster than the MPHFs generated by the BDZ algorithm. - [An algorithm to generate PHFs with range m = cn, for c > 1.22 bdz.html], which is referred to as BDZ_PH algorithm. It is actually the BDZ algorithm without the ranking step. The resulting functions can be stored in 1.95 bits per key for //c = 1.23// and are considerably faster than the MPHFs generated by the BDZ algorithm.
- An adapter to support a vector of struct as the source of keys has been added. - An adapter to support a vector of struct as the source of keys has been added.
- An API to support the ability of packing a perfect hash function into a preallocated contiguous memory space. The computation of the packed function is still faster and can be easily mmapped. - An API to support the ability of packing a perfect hash function into a preallocated contiguous memory space. The computation of a packed function is still faster and can be easily mmapped.
- The hash functions djb2, fnv and sdbm were removed because they do not use random seeds and therefore are not useful for MPHFs algorithms. - The hash functions djb2, fnv and sdbm were removed because they do not use random seeds and therefore are not useful for MPHFs algorithms.
- All reported bugs and suggestions have been corrected and included as well. - All reported bugs and suggestions have been corrected and included as well.

View File

@ -11,6 +11,7 @@ txt2tags -t html -i GPERF.t2t -o gperf.html
txt2tags -t html -i FAQ.t2t -o faq.html txt2tags -t html -i FAQ.t2t -o faq.html
txt2tags -t html -i CONCEPTS.t2t -o concepts.html txt2tags -t html -i CONCEPTS.t2t -o concepts.html
txt2tags -t html -i NEWSLOG.t2t -o newslog.html txt2tags -t html -i NEWSLOG.t2t -o newslog.html
txt2tags -t html -i EXAMPLES.t2t -o examples.html
txt2tags -t txt --mask-email -i README.t2t -o README txt2tags -t txt --mask-email -i README.t2t -o README
txt2tags -t txt -i BDZ.t2t -o BDZ txt2tags -t txt -i BDZ.t2t -o BDZ
@ -23,3 +24,4 @@ txt2tags -t txt -i GPERF.t2t -o GPERF
txt2tags -t txt -i FAQ.t2t -o FAQ txt2tags -t txt -i FAQ.t2t -o FAQ
txt2tags -t txt -i CONCEPTS.t2t -o CONCEPTS txt2tags -t txt -i CONCEPTS.t2t -o CONCEPTS
txt2tags -t txt -i NEWSLOG.t2t -o NEWSLOG txt2tags -t txt -i NEWSLOG.t2t -o NEWSLOG
txt2tags -t txt -i EXAMPLES.t2t -o EXAMPLES