Added initial txt2tags documentation.
This commit is contained in:
parent
31e9d838e8
commit
76f7be31e4
34
ChangeLog
34
ChangeLog
@ -1,3 +1,37 @@
|
|||||||
|
2005-01-19 10:40 davi
|
||||||
|
|
||||||
|
* acinclude.m4, configure.ac: Added macros for large file support.
|
||||||
|
|
||||||
|
2005-01-18 19:06 fc_botelho
|
||||||
|
|
||||||
|
* src/: bmz.c, bmz.h, bmz_structs.h, cmph.c, cmph.h,
|
||||||
|
cmph_structs.c, cmph_structs.h, cmph_types.h, czech.c, czech.h,
|
||||||
|
czech_structs.h, djb2_hash.c, djb2_hash.h, fnv_hash.c, fnv_hash.h,
|
||||||
|
graph.c, graph.h, hash.c, hash.h, hash_state.h, jenkins_hash.c,
|
||||||
|
jenkins_hash.h, main.c, sdbm_hash.c, sdbm_hash.h, vqueue.c,
|
||||||
|
vqueue.h, vstack.c, vstack.h: version with cmph prefix
|
||||||
|
|
||||||
|
2005-01-18 15:10 davi
|
||||||
|
|
||||||
|
* ChangeLog, cmph.vcproj, cmphapp.vcproj, wingetopt.c, wingetopt.h:
|
||||||
|
Added missing files.
|
||||||
|
|
||||||
|
2005-01-18 14:25 fc_botelho
|
||||||
|
|
||||||
|
* aclocal.m4: initial version
|
||||||
|
|
||||||
|
2005-01-18 14:16 fc_botelho
|
||||||
|
|
||||||
|
* aclocal.m4: initial version
|
||||||
|
|
||||||
|
2005-01-18 13:58 fc_botelho
|
||||||
|
|
||||||
|
* src/czech.c: using bit mask to represent boolean values
|
||||||
|
|
||||||
|
2005-01-18 13:56 fc_botelho
|
||||||
|
|
||||||
|
* src/czech.c: no message
|
||||||
|
|
||||||
2005-01-18 10:18 davi
|
2005-01-18 10:18 davi
|
||||||
|
|
||||||
* COPYING, INSTALL, src/Makefile.am, src/bmz.c, src/bmz.h,
|
* COPYING, INSTALL, src/Makefile.am, src/bmz.c, src/bmz.h,
|
||||||
|
147
README
147
README
@ -0,0 +1,147 @@
|
|||||||
|
== cmph - C Minimal Perfect Hashing Library ==
|
||||||
|
|
||||||
|
Description
|
||||||
|
|
||||||
|
C Minimal Perfect Hashing Library is a portable LGPLed library to create and
|
||||||
|
work with minimal perfect hashes. The cmph library encapsulates the newest
|
||||||
|
and more efficient algorithms in the literature in a ease-to-use,
|
||||||
|
production-quality, fast API. The library is designed to work big entries that
|
||||||
|
won't fit in the main memory. It has been used successfully to create hashes
|
||||||
|
bigger than 100 million entries. Although there is a lack of similar libraries
|
||||||
|
in the free software world, we can point out some of the "distinguishing"
|
||||||
|
features of cmph:
|
||||||
|
|
||||||
|
- Fast
|
||||||
|
- Space-efficient with main memory usage carefully documented
|
||||||
|
- The best modern algorithms are available (or at least scheduled for implementation :-))
|
||||||
|
- Object oriented implementation
|
||||||
|
- Works with in-disk key sets through use of adapter pattern
|
||||||
|
- Serialization of hash functions
|
||||||
|
- Easily extensible
|
||||||
|
- Well encapsulated API aiming binary compatibility through releases
|
||||||
|
- Free Software
|
||||||
|
|
||||||
|
News for version 0.3
|
||||||
|
|
||||||
|
- New heuristics in bmz algorithm, providing hash creation with only
|
||||||
|
(0.93 * 16 + 4)*n bytes and hash query with (0.93*4)n bytes
|
||||||
|
|
||||||
|
Examples
|
||||||
|
|
||||||
|
Using cmph is quite ease. Take a look.
|
||||||
|
|
||||||
|
|
||||||
|
// Create minimal perfect hash from in-memory vector
|
||||||
|
#include <cmph.h>
|
||||||
|
...
|
||||||
|
|
||||||
|
const char **vector;
|
||||||
|
unsigned int nkeys;
|
||||||
|
//Fill vector
|
||||||
|
//...
|
||||||
|
|
||||||
|
//Create minimal perfect hash
|
||||||
|
cmph_config_t *config = cmph_config_new(cmph_io_vector_adapter(vector, nkeys));
|
||||||
|
cmph_t *hash = cmph_new(config);
|
||||||
|
cmph_config_destroy(config);
|
||||||
|
|
||||||
|
//Find key
|
||||||
|
const char *key = "sample key";
|
||||||
|
unsigned int id = cmph_search(hash, key);
|
||||||
|
|
||||||
|
//Destroy hash
|
||||||
|
cmph_destroy(hash);
|
||||||
|
|
||||||
|
-------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
// Create minimal perfect hash from in-disk keys using BMZ algorithm
|
||||||
|
#include <cmph.h>
|
||||||
|
...
|
||||||
|
|
||||||
|
//Open file with newline separated list of keys
|
||||||
|
FILE *fd = fopen("keysfile_newline_separated", "r");
|
||||||
|
//check for errors
|
||||||
|
//...
|
||||||
|
|
||||||
|
cmph_config_t *config = cmph_config_new(cmph_io_nlfile_adapter(fd));
|
||||||
|
cmph_config_set_algo(config, CMPH_BMZ);
|
||||||
|
cmph_t *hash = cmph_new(config);
|
||||||
|
cmph_config_destroy(config);
|
||||||
|
fclose(fd);
|
||||||
|
|
||||||
|
//Find key
|
||||||
|
const char *key = "sample key";
|
||||||
|
unsigned int id = cmph_search(hash, key);
|
||||||
|
|
||||||
|
//Destroy hash
|
||||||
|
cmph_destroy(hash);
|
||||||
|
|
||||||
|
--------------------------------------
|
||||||
|
|
||||||
|
The cmph application
|
||||||
|
|
||||||
|
cmph is the name of both the library and the utility
|
||||||
|
application that comes with this package. You can use the cmph
|
||||||
|
application to create minimal perfect hashes from command line. The cmph utility
|
||||||
|
comes with a number of flags, but it is very simple to create and query
|
||||||
|
minimal perfect hashes:
|
||||||
|
|
||||||
|
|
||||||
|
$ # Create mph for keys in file keys_file
|
||||||
|
$ ./cmph keys_file
|
||||||
|
$ # Query id of keys in the file keys_query
|
||||||
|
$ ./cmph -m keys_file.mph keys_query
|
||||||
|
|
||||||
|
|
||||||
|
The additional options let you set most of the parameters you have
|
||||||
|
available through the C API. Below you can see the full help message for the
|
||||||
|
utility.
|
||||||
|
|
||||||
|
|
||||||
|
usage: cmph [-v] [-h] [-V] [-k] [-g [-s seed] ] [-m file.mph] [-a algorithm] keysfile
|
||||||
|
Minimum perfect hashing tool
|
||||||
|
|
||||||
|
-h print this help message
|
||||||
|
-c c value that determines the number of vertices in the graph
|
||||||
|
-a algorithm - valid values are
|
||||||
|
* czech
|
||||||
|
* bmz
|
||||||
|
-f hash function (may be used multiple times) - valid values are
|
||||||
|
* jenkins
|
||||||
|
* djb2
|
||||||
|
* sdbm
|
||||||
|
* fnv
|
||||||
|
* glib
|
||||||
|
* pjw
|
||||||
|
-V print version number and exit
|
||||||
|
-v increase verbosity (may be used multiple times)
|
||||||
|
-k number of keys
|
||||||
|
-g generation mode
|
||||||
|
-s random seed
|
||||||
|
-m minimum perfect hash function file
|
||||||
|
keysfile line separated file with keys
|
||||||
|
|
||||||
|
|
||||||
|
Downloads
|
||||||
|
|
||||||
|
Use the project page at sourceforge: http://sf.net/projects/cmph
|
||||||
|
|
||||||
|
License Stuff
|
||||||
|
|
||||||
|
Code is under the LGPL.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
Enjoy!
|
||||||
|
|
||||||
|
Davi de Castro Reis
|
||||||
|
|
||||||
|
Fabiano Cupertino Botelho
|
||||||
|
|
||||||
|
Last Updated: Thu Jan 20 11:01:01 2005
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
149
README.t2t
Normal file
149
README.t2t
Normal file
@ -0,0 +1,149 @@
|
|||||||
|
== cmph - C Minimal Perfect Hashing Library ==
|
||||||
|
|
||||||
|
|
||||||
|
**Description**
|
||||||
|
|
||||||
|
C Minimal Perfect Hashing Library is a portable LGPLed library to create and
|
||||||
|
work with minimal perfect hashes. The cmph library encapsulates the newest
|
||||||
|
and more efficient algorithms in the literature in a ease-to-use,
|
||||||
|
production-quality, fast API. The library is designed to work big entries that
|
||||||
|
won't fit in the main memory. It has been used successfully to create hashes
|
||||||
|
bigger than 100 million entries. Although there is a lack of similar libraries
|
||||||
|
in the free software world, we can point out some of the "distinguishing"
|
||||||
|
features of cmph:
|
||||||
|
|
||||||
|
- Fast
|
||||||
|
- Space-efficient with main memory usage carefully documented
|
||||||
|
- The best modern algorithms are available (or at least scheduled for implementation :-))
|
||||||
|
- Object oriented implementation
|
||||||
|
- Works with in-disk key sets through use of adapter pattern
|
||||||
|
- Serialization of hash functions
|
||||||
|
- Easily extensible
|
||||||
|
- Well encapsulated API aiming binary compatibility through releases
|
||||||
|
- Free Software
|
||||||
|
|
||||||
|
|
||||||
|
**News for version 0.3**
|
||||||
|
|
||||||
|
- New heuristics in bmz algorithm, providing hash creation with only
|
||||||
|
(0.93 * 16 + 4)*n bytes and hash query with (0.93*4)n bytes
|
||||||
|
|
||||||
|
**Examples**
|
||||||
|
|
||||||
|
Using cmph is quite ease. Take a look.
|
||||||
|
|
||||||
|
|
||||||
|
```
|
||||||
|
// Create minimal perfect hash from in-memory vector
|
||||||
|
#include <cmph.h>
|
||||||
|
...
|
||||||
|
|
||||||
|
const char **vector;
|
||||||
|
unsigned int nkeys;
|
||||||
|
//Fill vector
|
||||||
|
//...
|
||||||
|
|
||||||
|
//Create minimal perfect hash
|
||||||
|
cmph_config_t *config = cmph_config_new(cmph_io_vector_adapter(vector, nkeys));
|
||||||
|
cmph_t *hash = cmph_new(config);
|
||||||
|
cmph_config_destroy(config);
|
||||||
|
|
||||||
|
//Find key
|
||||||
|
const char *key = "sample key";
|
||||||
|
unsigned int id = cmph_search(hash, key);
|
||||||
|
|
||||||
|
//Destroy hash
|
||||||
|
cmph_destroy(hash);
|
||||||
|
```
|
||||||
|
-------------------------------
|
||||||
|
|
||||||
|
```
|
||||||
|
// Create minimal perfect hash from in-disk keys using BMZ algorithm
|
||||||
|
#include <cmph.h>
|
||||||
|
...
|
||||||
|
|
||||||
|
//Open file with newline separated list of keys
|
||||||
|
FILE *fd = fopen("keysfile_newline_separated", "r");
|
||||||
|
//check for errors
|
||||||
|
//...
|
||||||
|
|
||||||
|
cmph_config_t *config = cmph_config_new(cmph_io_nlfile_adapter(fd));
|
||||||
|
cmph_config_set_algo(config, CMPH_BMZ);
|
||||||
|
cmph_t *hash = cmph_new(config);
|
||||||
|
cmph_config_destroy(config);
|
||||||
|
fclose(fd);
|
||||||
|
|
||||||
|
//Find key
|
||||||
|
const char *key = "sample key";
|
||||||
|
unsigned int id = cmph_search(hash, key);
|
||||||
|
|
||||||
|
//Destroy hash
|
||||||
|
cmph_destroy(hash);
|
||||||
|
```
|
||||||
|
--------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
**The cmph application**
|
||||||
|
|
||||||
|
cmph is the name of both the library and the utility
|
||||||
|
application that comes with this package. You can use the cmph
|
||||||
|
application to create minimal perfect hashes from command line. The cmph utility
|
||||||
|
comes with a number of flags, but it is very simple to create and query
|
||||||
|
minimal perfect hashes:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ # Create mph for keys in file keys_file
|
||||||
|
$ ./cmph keys_file
|
||||||
|
$ # Query id of keys in the file keys_query
|
||||||
|
$ ./cmph -m keys_file.mph keys_query
|
||||||
|
```
|
||||||
|
|
||||||
|
The additional options let you set most of the parameters you have
|
||||||
|
available through the C API. Below you can see the full help message for the
|
||||||
|
utility.
|
||||||
|
|
||||||
|
|
||||||
|
```
|
||||||
|
usage: cmph [-v] [-h] [-V] [-k] [-g [-s seed] ] [-m file.mph] [-a algorithm] keysfile
|
||||||
|
Minimum perfect hashing tool
|
||||||
|
|
||||||
|
-h print this help message
|
||||||
|
-c c value that determines the number of vertices in the graph
|
||||||
|
-a algorithm - valid values are
|
||||||
|
* czech
|
||||||
|
* bmz
|
||||||
|
-f hash function (may be used multiple times) - valid values are
|
||||||
|
* jenkins
|
||||||
|
* djb2
|
||||||
|
* sdbm
|
||||||
|
* fnv
|
||||||
|
* glib
|
||||||
|
* pjw
|
||||||
|
-V print version number and exit
|
||||||
|
-v increase verbosity (may be used multiple times)
|
||||||
|
-k number of keys
|
||||||
|
-g generation mode
|
||||||
|
-s random seed
|
||||||
|
-m minimum perfect hash function file
|
||||||
|
keysfile line separated file with keys
|
||||||
|
```
|
||||||
|
|
||||||
|
**Downloads**
|
||||||
|
|
||||||
|
Use the project page at sourceforge: http://sf.net/projects/cmph
|
||||||
|
|
||||||
|
**License Stuff**
|
||||||
|
|
||||||
|
Code is under the LGPL.
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
Enjoy!
|
||||||
|
|
||||||
|
Davi de Castro Reis
|
||||||
|
|
||||||
|
Fabiano Cupertino Botelho
|
||||||
|
|
||||||
|
|
||||||
|
%preproc(html): '^%html% ' ''
|
||||||
|
%html% <a href="http://sourceforge.net"><img src="http://sourceforge.net/sflogo.php?group_id=96251&type=1" width="88" height="31" border="0" alt="SourceForge.net Logo" /></a>
|
||||||
|
Last Updated: %%date(%c)
|
@ -1,4 +1,4 @@
|
|||||||
#include <getopt.h>
|
#include "../wingetopt.h"
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
@ -8,7 +8,6 @@
|
|||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include "cmph.h"
|
#include "cmph.h"
|
||||||
#include "hash.h"
|
#include "hash.h"
|
||||||
#include "../wingetopt.h"
|
|
||||||
|
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
#define VERSION "0.2"
|
#define VERSION "0.2"
|
||||||
|
@ -16,8 +16,8 @@ extern "C" {
|
|||||||
int opterr = 1; /* non-zero if a question mark should be returned */
|
int opterr = 1; /* non-zero if a question mark should be returned */
|
||||||
|
|
||||||
int getopt(int argc, char *argv[], char *opstring);
|
int getopt(int argc, char *argv[], char *opstring);
|
||||||
#endif
|
#endif //_GETOPT_
|
||||||
#endif
|
#endif //WIN32
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user