diff --git a/INSTALL b/INSTALL index 095b1eb..b42a17a 100644 --- a/INSTALL +++ b/INSTALL @@ -1,43 +1,27 @@ -Installation Instructions -************************* - -Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004 Free -Software Foundation, Inc. - -This file is free documentation; the Free Software Foundation gives -unlimited permission to copy, distribute and modify it. - Basic Installation ================== -These are generic installation instructions. + These are generic installation instructions. The `configure' shell script attempts to guess correct values for various system-dependent variables used during compilation. It uses those values to create a `Makefile' in each directory of the package. It may also create one or more `.h' files containing system-dependent definitions. Finally, it creates a shell script `config.status' that -you can run in the future to recreate the current configuration, and a -file `config.log' containing compiler output (useful mainly for -debugging `configure'). - - It can also use an optional file (typically called `config.cache' -and enabled with `--cache-file=config.cache' or simply `-C') that saves -the results of its tests to speed up reconfiguring. (Caching is -disabled by default to prevent problems with accidental use of stale -cache files.) +you can run in the future to recreate the current configuration, a file +`config.cache' that saves the results of its tests to speed up +reconfiguring, and a file `config.log' containing compiler output +(useful mainly for debugging `configure'). If you need to do unusual things to compile the package, please try to figure out how `configure' could check whether to do them, and mail diffs or instructions to the address given in the `README' so they can -be considered for the next release. If you are using the cache, and at -some point `config.cache' contains results you don't want to keep, you -may remove or edit it. +be considered for the next release. If at some point `config.cache' +contains results you don't want to keep, you may remove or edit it. - The file `configure.ac' (or `configure.in') is used to create -`configure' by a program called `autoconf'. You only need -`configure.ac' if you want to change it or regenerate `configure' using -a newer version of `autoconf'. + The file `configure.in' is used to create `configure' by a program +called `autoconf'. You only need `configure.in' if you want to change +it or regenerate `configure' using a newer version of `autoconf'. The simplest way to compile this package is: @@ -70,22 +54,20 @@ The simplest way to compile this package is: Compilers and Options ===================== -Some systems require unusual options for compilation or linking that the -`configure' script does not know about. Run `./configure --help' for -details on some of the pertinent environment variables. + Some systems require unusual options for compilation or linking that +the `configure' script does not know about. You can give `configure' +initial values for variables by setting them in the environment. Using +a Bourne-compatible shell, you can do that on the command line like +this: + CC=c89 CFLAGS=-O2 LIBS=-lposix ./configure - You can give `configure' initial values for configuration parameters -by setting variables in the command line or in the environment. Here -is an example: - - ./configure CC=c89 CFLAGS=-O2 LIBS=-lposix - - *Note Defining Variables::, for more details. +Or on systems that have the `env' program, you can do it like this: + env CPPFLAGS=-I/usr/local/include LDFLAGS=-s ./configure Compiling For Multiple Architectures ==================================== -You can compile the package for more than one kind of computer at the + You can compile the package for more than one kind of computer at the same time, by placing the object files for each architecture in their own directory. To do this, you must use a version of `make' that supports the `VPATH' variable, such as GNU `make'. `cd' to the @@ -93,28 +75,28 @@ directory where you want the object files and executables to go and run the `configure' script. `configure' automatically checks for the source code in the directory that `configure' is in and in `..'. - If you have to use a `make' that does not support the `VPATH' -variable, you have to compile the package for one architecture at a -time in the source code directory. After you have installed the -package for one architecture, use `make distclean' before reconfiguring -for another architecture. + If you have to use a `make' that does not supports the `VPATH' +variable, you have to compile the package for one architecture at a time +in the source code directory. After you have installed the package for +one architecture, use `make distclean' before reconfiguring for another +architecture. Installation Names ================== -By default, `make install' will install the package's files in + By default, `make install' will install the package's files in `/usr/local/bin', `/usr/local/man', etc. You can specify an installation prefix other than `/usr/local' by giving `configure' the -option `--prefix=PREFIX'. +option `--prefix=PATH'. You can specify separate installation prefixes for architecture-specific files and architecture-independent files. If you -give `configure' the option `--exec-prefix=PREFIX', the package will -use PREFIX as the prefix for installing programs and libraries. +give `configure' the option `--exec-prefix=PATH', the package will use +PATH as the prefix for installing programs and libraries. Documentation and other data files will still use the regular prefix. In addition, if you use an unusual directory layout you can give -options like `--bindir=DIR' to specify different values for particular +options like `--bindir=PATH' to specify different values for particular kinds of files. Run `configure --help' for a list of the directories you can set and what kinds of files go in them. @@ -125,7 +107,7 @@ option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. Optional Features ================= -Some packages pay attention to `--enable-FEATURE' options to + Some packages pay attention to `--enable-FEATURE' options to `configure', where FEATURE indicates an optional part of the package. They may also pay attention to `--with-PACKAGE' options, where PACKAGE is something like `gnu-as' or `x' (for the X Window System). The @@ -140,80 +122,47 @@ you can use the `configure' options `--x-includes=DIR' and Specifying the System Type ========================== -There may be some features `configure' cannot figure out automatically, -but needs to determine by the type of machine the package will run on. -Usually, assuming the package is built to be run on the _same_ -architectures, `configure' can figure that out, but if it prints a -message saying it cannot guess the machine type, give it the -`--build=TYPE' option. TYPE can either be a short name for the system -type, such as `sun4', or a canonical name which has the form: - + There may be some features `configure' can not figure out +automatically, but needs to determine by the type of host the package +will run on. Usually `configure' can figure that out, but if it prints +a message saying it can not guess the host type, give it the +`--host=TYPE' option. TYPE can either be a short name for the system +type, such as `sun4', or a canonical name with three fields: CPU-COMPANY-SYSTEM -where SYSTEM can have one of these forms: - - OS KERNEL-OS - - See the file `config.sub' for the possible values of each field. If +See the file `config.sub' for the possible values of each field. If `config.sub' isn't included in this package, then this package doesn't -need to know the machine type. +need to know the host type. - If you are _building_ compiler tools for cross-compiling, you should + If you are building compiler tools for cross-compiling, you can also use the `--target=TYPE' option to select the type of system they will -produce code for. - - If you want to _use_ a cross compiler, that generates code for a -platform different from the build platform, you should specify the -"host" platform (i.e., that on which the generated programs will -eventually be run) with `--host=TYPE'. +produce code for and the `--build=TYPE' option to select the type of +system on which you are compiling the package. Sharing Defaults ================ -If you want to set default values for `configure' scripts to share, you -can create a site shell script called `config.site' that gives default -values for variables like `CC', `cache_file', and `prefix'. + If you want to set default values for `configure' scripts to share, +you can create a site shell script called `config.site' that gives +default values for variables like `CC', `cache_file', and `prefix'. `configure' looks for `PREFIX/share/config.site' if it exists, then `PREFIX/etc/config.site' if it exists. Or, you can set the `CONFIG_SITE' environment variable to the location of the site script. A warning: not all `configure' scripts look for a site script. -Defining Variables +Operation Controls ================== -Variables not defined in a site shell script can be set in the -environment passed to `configure'. However, some packages may run -configure again during the build, and the customized values of these -variables may be lost. In order to avoid this problem, you should set -them in the `configure' command line, using `VAR=value'. For example: - - ./configure CC=/usr/local2/bin/gcc - -will cause the specified gcc to be used as the C compiler (unless it is -overridden in the site shell script). - -`configure' Invocation -====================== - -`configure' recognizes the following options to control how it operates. - -`--help' -`-h' - Print a summary of the options to `configure', and exit. - -`--version' -`-V' - Print the version of Autoconf used to generate the `configure' - script, and exit. + `configure' recognizes the following options to control how it +operates. `--cache-file=FILE' - Enable the cache: use and save the results of the tests in FILE, - traditionally `config.cache'. FILE defaults to `/dev/null' to - disable caching. + Use and save the results of the tests in FILE instead of + `./config.cache'. Set FILE to `/dev/null' to disable caching, for + debugging `configure'. -`--config-cache' -`-C' - Alias for `--cache-file=config.cache'. +`--help' + Print a summary of the options to `configure', and exit. `--quiet' `--silent' @@ -226,6 +175,8 @@ overridden in the site shell script). Look for the package's source code in directory DIR. Usually `configure' can determine that directory automatically. -`configure' also accepts some other, not widely useful, options. Run -`configure --help' for more details. +`--version' + Print the version of Autoconf used to generate the `configure' + script, and exit. +`configure' also accepts some other, not widely useful, options. diff --git a/Makefile.am b/Makefile.am index c735106..a8d050c 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,2 +1,2 @@ -SUBDIRS = src tests +SUBDIRS = src tests examples EXTRA_DIST = cmph.spec configure.ac diff --git a/configure.ac b/configure.ac index 9986da0..f3b0bb1 100644 --- a/configure.ac +++ b/configure.ac @@ -35,4 +35,4 @@ dnl Checks for library functions. AC_CHECK_SPOON dnl AC_OUTPUT(Makefile tests/Makefile samples/Makefile) -AC_OUTPUT(Makefile src/Makefile tests/Makefile) +AC_OUTPUT(Makefile src/Makefile tests/Makefile examples/Makefile) diff --git a/examples/.deps/file_adapter_ex2.Po b/examples/.deps/file_adapter_ex2.Po new file mode 100644 index 0000000..ab68e18 --- /dev/null +++ b/examples/.deps/file_adapter_ex2.Po @@ -0,0 +1,80 @@ +file_adapter_ex2.o: file_adapter_ex2.c ../src/cmph.h \ + /usr/include/stdlib.h /usr/include/features.h /usr/include/sys/cdefs.h \ + /usr/include/gnu/stubs.h \ + /usr/lib/gcc-lib/i486-linux/3.3.5/include/stddef.h \ + /usr/include/sys/types.h /usr/include/bits/types.h \ + /usr/include/bits/wordsize.h /usr/include/bits/typesizes.h \ + /usr/include/time.h /usr/include/endian.h /usr/include/bits/endian.h \ + /usr/include/sys/select.h /usr/include/bits/select.h \ + /usr/include/bits/sigset.h /usr/include/bits/time.h \ + /usr/include/sys/sysmacros.h /usr/include/bits/pthreadtypes.h \ + /usr/include/bits/sched.h /usr/include/alloca.h /usr/include/stdio.h \ + /usr/include/libio.h /usr/include/_G_config.h /usr/include/wchar.h \ + /usr/include/bits/wchar.h /usr/include/gconv.h \ + /usr/lib/gcc-lib/i486-linux/3.3.5/include/stdarg.h \ + /usr/include/bits/stdio_lim.h /usr/include/bits/sys_errlist.h \ + /usr/include/bits/stdio.h ../src/cmph_types.h + +../src/cmph.h: + +/usr/include/stdlib.h: + +/usr/include/features.h: + +/usr/include/sys/cdefs.h: + +/usr/include/gnu/stubs.h: + +/usr/lib/gcc-lib/i486-linux/3.3.5/include/stddef.h: + +/usr/include/sys/types.h: + +/usr/include/bits/types.h: + +/usr/include/bits/wordsize.h: + +/usr/include/bits/typesizes.h: + +/usr/include/time.h: + +/usr/include/endian.h: + +/usr/include/bits/endian.h: + +/usr/include/sys/select.h: + +/usr/include/bits/select.h: + +/usr/include/bits/sigset.h: + +/usr/include/bits/time.h: + +/usr/include/sys/sysmacros.h: + +/usr/include/bits/pthreadtypes.h: + +/usr/include/bits/sched.h: + +/usr/include/alloca.h: + +/usr/include/stdio.h: + +/usr/include/libio.h: + +/usr/include/_G_config.h: + +/usr/include/wchar.h: + +/usr/include/bits/wchar.h: + +/usr/include/gconv.h: + +/usr/lib/gcc-lib/i486-linux/3.3.5/include/stdarg.h: + +/usr/include/bits/stdio_lim.h: + +/usr/include/bits/sys_errlist.h: + +/usr/include/bits/stdio.h: + +../src/cmph_types.h: diff --git a/examples/.deps/vector_adapter_ex1.Po b/examples/.deps/vector_adapter_ex1.Po new file mode 100644 index 0000000..9a9118b --- /dev/null +++ b/examples/.deps/vector_adapter_ex1.Po @@ -0,0 +1,80 @@ +vector_adapter_ex1.o: vector_adapter_ex1.c ../src/cmph.h \ + /usr/include/stdlib.h /usr/include/features.h /usr/include/sys/cdefs.h \ + /usr/include/gnu/stubs.h \ + /usr/lib/gcc-lib/i486-linux/3.3.5/include/stddef.h \ + /usr/include/sys/types.h /usr/include/bits/types.h \ + /usr/include/bits/wordsize.h /usr/include/bits/typesizes.h \ + /usr/include/time.h /usr/include/endian.h /usr/include/bits/endian.h \ + /usr/include/sys/select.h /usr/include/bits/select.h \ + /usr/include/bits/sigset.h /usr/include/bits/time.h \ + /usr/include/sys/sysmacros.h /usr/include/bits/pthreadtypes.h \ + /usr/include/bits/sched.h /usr/include/alloca.h /usr/include/stdio.h \ + /usr/include/libio.h /usr/include/_G_config.h /usr/include/wchar.h \ + /usr/include/bits/wchar.h /usr/include/gconv.h \ + /usr/lib/gcc-lib/i486-linux/3.3.5/include/stdarg.h \ + /usr/include/bits/stdio_lim.h /usr/include/bits/sys_errlist.h \ + /usr/include/bits/stdio.h ../src/cmph_types.h + +../src/cmph.h: + +/usr/include/stdlib.h: + +/usr/include/features.h: + +/usr/include/sys/cdefs.h: + +/usr/include/gnu/stubs.h: + +/usr/lib/gcc-lib/i486-linux/3.3.5/include/stddef.h: + +/usr/include/sys/types.h: + +/usr/include/bits/types.h: + +/usr/include/bits/wordsize.h: + +/usr/include/bits/typesizes.h: + +/usr/include/time.h: + +/usr/include/endian.h: + +/usr/include/bits/endian.h: + +/usr/include/sys/select.h: + +/usr/include/bits/select.h: + +/usr/include/bits/sigset.h: + +/usr/include/bits/time.h: + +/usr/include/sys/sysmacros.h: + +/usr/include/bits/pthreadtypes.h: + +/usr/include/bits/sched.h: + +/usr/include/alloca.h: + +/usr/include/stdio.h: + +/usr/include/libio.h: + +/usr/include/_G_config.h: + +/usr/include/wchar.h: + +/usr/include/bits/wchar.h: + +/usr/include/gconv.h: + +/usr/lib/gcc-lib/i486-linux/3.3.5/include/stdarg.h: + +/usr/include/bits/stdio_lim.h: + +/usr/include/bits/sys_errlist.h: + +/usr/include/bits/stdio.h: + +../src/cmph_types.h: diff --git a/examples/Makefile.am b/examples/Makefile.am new file mode 100755 index 0000000..812919f --- /dev/null +++ b/examples/Makefile.am @@ -0,0 +1,10 @@ +noinst_PROGRAMS = vector_adapter_ex1 file_adapter_ex2 + +INCLUDES = -I../src/ + +vector_adapter_ex1_LDADD = ../src/libcmph.la +vector_adapter_ex1_SOURCES = vector_adapter_ex1.c + +file_adapter_ex2_LDADD = ../src/libcmph.la +file_adapter_ex2_SOURCES = file_adapter_ex2.c + diff --git a/examples/file_adapter_ex2.c b/examples/file_adapter_ex2.c new file mode 100644 index 0000000..57ec5b6 --- /dev/null +++ b/examples/file_adapter_ex2.c @@ -0,0 +1,32 @@ +#include +#include + + // Create minimal perfect hash function from in-disk keys using BMZ algorithm +int main(int argc, char **argv) +{ + //Open file with newline separated list of keys + FILE * keys_fd = fopen("keys.txt", "r"); + cmph_t *hash = NULL; + if (keys_fd == NULL) + { + fprintf(stderr, "File \"keys.txt\" not found\n"); + exit(1); + } + // Source of keys + cmph_io_adapter_t *source = cmph_io_nlfile_adapter(keys_fd); + + cmph_config_t *config = cmph_config_new(source); + cmph_config_set_algo(config, CMPH_BMZ); + hash = cmph_new(config); + cmph_config_destroy(config); + + //Find key + const char *key = "jjjjjjjjjj"; + unsigned int id = cmph_search(hash, key, strlen(key)); + fprintf(stderr, "Id:%u\n", id); + //Destroy hash + cmph_destroy(hash); + free(source); + fclose(keys_fd); + return 0; +} diff --git a/examples/keys.txt b/examples/keys.txt new file mode 100644 index 0000000..e1edd7c --- /dev/null +++ b/examples/keys.txt @@ -0,0 +1,10 @@ +aaaaaaaaaa +bbbbbbbbbb +cccccccccc +dddddddddd +eeeeeeeeee +ffffffffff +gggggggggg +hhhhhhhhhh +iiiiiiiiii +jjjjjjjjjj diff --git a/examples/vector_adapter_ex1.c b/examples/vector_adapter_ex1.c new file mode 100755 index 0000000..1ae8abb --- /dev/null +++ b/examples/vector_adapter_ex1.c @@ -0,0 +1,26 @@ +#include + +// Create minimal perfect hash function from in-memory vector +int main(int argc, char **argv) +{ + // Creating a filled vector + const char *vector[] = {"aaaaaaaaaa", "bbbbbbbbbb", "cccccccccc", "dddddddddd", "eeeeeeeeee", + "ffffffffff", "gggggggggg", "hhhhhhhhhh", "iiiiiiiiii", "jjjjjjjjjj"}; + unsigned int nkeys = 10; + // Source of keys + cmph_io_adapter_t *source = cmph_io_vector_adapter(vector, nkeys); + + //Create minimal perfect hash function using the default (chm) algorithm. + cmph_config_t *config = cmph_config_new(source); + cmph_t *hash = cmph_new(config); + cmph_config_destroy(config); + + //Find key + const char *key = "jjjjjjjjjj"; + unsigned int id = cmph_search(hash, key, strlen(key)); + fprintf(stderr, "Id:%u\n", id); + //Destroy hash + cmph_destroy(hash); + free(source); + return 0; +} diff --git a/src/cmph.c b/src/cmph.c index 490db0e..18ac502 100644 --- a/src/cmph.c +++ b/src/cmph.c @@ -12,6 +12,8 @@ const char *cmph_names[] = { "bmz", "chm", NULL }; /* included -- Fabiano */ +static cmph_uint32 position; // access position when data is a vector + static int key_nlfile_read(void *data, char **key, cmph_uint32 *keylen) { FILE *fd = (FILE *)data; @@ -37,16 +39,34 @@ static int key_nlfile_read(void *data, char **key, cmph_uint32 *keylen) return *keylen; } +static int key_vector_read(void *data, char **key, cmph_uint32 *keylen) +{ + char **keys = (char **)data; + if (keys + position == NULL) return -1; + *keylen = strlen(*(keys + position)); + *key = (char *)malloc(*keylen); + strcpy(*key, *(keys + position)); + position ++; + return *keylen; +} + + static void key_nlfile_dispose(void *data, char *key, cmph_uint32 keylen) { free(key); } + static void key_nlfile_rewind(void *data) { FILE *fd = (FILE *)data; rewind(fd); } +static void key_vector_rewind(void *data) +{ + position = 0; +} + static cmph_uint32 count_nlfile_keys(FILE *fd) { cmph_uint32 count = 0; @@ -89,7 +109,14 @@ cmph_io_adapter_t *cmph_io_nlnkfile_adapter(FILE * keys_fd, cmph_uint32 nkeys) cmph_io_adapter_t *cmph_io_vector_adapter(const char ** vector, cmph_uint32 nkeys) { - return NULL; + cmph_io_adapter_t * key_source = malloc(sizeof(cmph_io_adapter_t)); + assert(key_source); + key_source->data = (void *)vector; + key_source->nkeys = nkeys; + key_source->read = key_vector_read; + key_source->dispose = key_nlfile_dispose; + key_source->rewind = key_vector_rewind; + return key_source; } cmph_config_t *cmph_config_new(cmph_io_adapter_t *key_source) diff --git a/src/cmph.h b/src/cmph.h index f457736..a7de997 100644 --- a/src/cmph.h +++ b/src/cmph.h @@ -27,7 +27,7 @@ typedef struct /* please call free() in the created adapters */ cmph_io_adapter_t *cmph_io_nlfile_adapter(FILE * keys_fd); cmph_io_adapter_t *cmph_io_nlnkfile_adapter(FILE * keys_fd, cmph_uint32 nkeys); -/*cmph_io_adapter_t *cmph_io_vector_adapter(const char ** vector, cmph_uint32 nkeys);*/ +cmph_io_adapter_t *cmph_io_vector_adapter(const char ** vector, cmph_uint32 nkeys); /** Hash configuration API **/ cmph_config_t *cmph_config_new(cmph_io_adapter_t *key_source);