diff --git a/INSTALL b/INSTALL index 54caf7c..095b1eb 100644 --- a/INSTALL +++ b/INSTALL @@ -1,13 +1,16 @@ -Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software -Foundation, Inc. +Installation Instructions +************************* - This file is free documentation; the Free Software Foundation gives +Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004 Free +Software Foundation, Inc. + +This file is free documentation; the Free Software Foundation gives unlimited permission to copy, distribute and modify it. Basic Installation ================== - These are generic installation instructions. +These are generic installation instructions. The `configure' shell script attempts to guess correct values for various system-dependent variables used during compilation. It uses @@ -67,9 +70,9 @@ The simplest way to compile this package is: Compilers and Options ===================== - Some systems require unusual options for compilation or linking that -the `configure' script does not know about. Run `./configure --help' -for details on some of the pertinent environment variables. +Some systems require unusual options for compilation or linking that the +`configure' script does not know about. Run `./configure --help' for +details on some of the pertinent environment variables. You can give `configure' initial values for configuration parameters by setting variables in the command line or in the environment. Here @@ -82,7 +85,7 @@ is an example: Compiling For Multiple Architectures ==================================== - You can compile the package for more than one kind of computer at the +You can compile the package for more than one kind of computer at the same time, by placing the object files for each architecture in their own directory. To do this, you must use a version of `make' that supports the `VPATH' variable, such as GNU `make'. `cd' to the @@ -99,19 +102,19 @@ for another architecture. Installation Names ================== - By default, `make install' will install the package's files in +By default, `make install' will install the package's files in `/usr/local/bin', `/usr/local/man', etc. You can specify an installation prefix other than `/usr/local' by giving `configure' the -option `--prefix=PATH'. +option `--prefix=PREFIX'. You can specify separate installation prefixes for architecture-specific files and architecture-independent files. If you -give `configure' the option `--exec-prefix=PATH', the package will use -PATH as the prefix for installing programs and libraries. +give `configure' the option `--exec-prefix=PREFIX', the package will +use PREFIX as the prefix for installing programs and libraries. Documentation and other data files will still use the regular prefix. In addition, if you use an unusual directory layout you can give -options like `--bindir=PATH' to specify different values for particular +options like `--bindir=DIR' to specify different values for particular kinds of files. Run `configure --help' for a list of the directories you can set and what kinds of files go in them. @@ -122,7 +125,7 @@ option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. Optional Features ================= - Some packages pay attention to `--enable-FEATURE' options to +Some packages pay attention to `--enable-FEATURE' options to `configure', where FEATURE indicates an optional part of the package. They may also pay attention to `--with-PACKAGE' options, where PACKAGE is something like `gnu-as' or `x' (for the X Window System). The @@ -137,11 +140,11 @@ you can use the `configure' options `--x-includes=DIR' and Specifying the System Type ========================== - There may be some features `configure' cannot figure out -automatically, but needs to determine by the type of machine the package -will run on. Usually, assuming the package is built to be run on the -_same_ architectures, `configure' can figure that out, but if it prints -a message saying it cannot guess the machine type, give it the +There may be some features `configure' cannot figure out automatically, +but needs to determine by the type of machine the package will run on. +Usually, assuming the package is built to be run on the _same_ +architectures, `configure' can figure that out, but if it prints a +message saying it cannot guess the machine type, give it the `--build=TYPE' option. TYPE can either be a short name for the system type, such as `sun4', or a canonical name which has the form: @@ -167,9 +170,9 @@ eventually be run) with `--host=TYPE'. Sharing Defaults ================ - If you want to set default values for `configure' scripts to share, -you can create a site shell script called `config.site' that gives -default values for variables like `CC', `cache_file', and `prefix'. +If you want to set default values for `configure' scripts to share, you +can create a site shell script called `config.site' that gives default +values for variables like `CC', `cache_file', and `prefix'. `configure' looks for `PREFIX/share/config.site' if it exists, then `PREFIX/etc/config.site' if it exists. Or, you can set the `CONFIG_SITE' environment variable to the location of the site script. @@ -178,7 +181,7 @@ A warning: not all `configure' scripts look for a site script. Defining Variables ================== - Variables not defined in a site shell script can be set in the +Variables not defined in a site shell script can be set in the environment passed to `configure'. However, some packages may run configure again during the build, and the customized values of these variables may be lost. In order to avoid this problem, you should set @@ -192,8 +195,7 @@ overridden in the site shell script). `configure' Invocation ====================== - `configure' recognizes the following options to control how it -operates. +`configure' recognizes the following options to control how it operates. `--help' `-h' diff --git a/examples/.deps/file_adapter_ex2.Po b/examples/.deps/file_adapter_ex2.Po index 5aa5200..00af2d2 100644 --- a/examples/.deps/file_adapter_ex2.Po +++ b/examples/.deps/file_adapter_ex2.Po @@ -1,7 +1,7 @@ file_adapter_ex2.o file_adapter_ex2.o: file_adapter_ex2.c ../src/cmph.h \ /usr/include/stdlib.h /usr/include/features.h /usr/include/sys/cdefs.h \ /usr/include/gnu/stubs.h \ - /usr/lib/gcc-lib/i586-suse-linux/3.3.4/include/stddef.h \ + /usr/lib/gcc/i386-redhat-linux/3.4.2/include/stddef.h \ /usr/include/sys/types.h /usr/include/bits/types.h \ /usr/include/bits/wordsize.h /usr/include/bits/typesizes.h \ /usr/include/time.h /usr/include/endian.h /usr/include/bits/endian.h \ @@ -11,7 +11,7 @@ file_adapter_ex2.o file_adapter_ex2.o: file_adapter_ex2.c ../src/cmph.h \ /usr/include/bits/sched.h /usr/include/alloca.h /usr/include/stdio.h \ /usr/include/libio.h /usr/include/_G_config.h /usr/include/wchar.h \ /usr/include/bits/wchar.h /usr/include/gconv.h \ - /usr/lib/gcc-lib/i586-suse-linux/3.3.4/include/stdarg.h \ + /usr/lib/gcc/i386-redhat-linux/3.4.2/include/stdarg.h \ /usr/include/bits/stdio_lim.h /usr/include/bits/sys_errlist.h \ /usr/include/bits/stdio.h ../src/cmph_types.h @@ -25,7 +25,7 @@ file_adapter_ex2.o file_adapter_ex2.o: file_adapter_ex2.c ../src/cmph.h \ /usr/include/gnu/stubs.h: -/usr/lib/gcc-lib/i586-suse-linux/3.3.4/include/stddef.h: +/usr/lib/gcc/i386-redhat-linux/3.4.2/include/stddef.h: /usr/include/sys/types.h: @@ -69,7 +69,7 @@ file_adapter_ex2.o file_adapter_ex2.o: file_adapter_ex2.c ../src/cmph.h \ /usr/include/gconv.h: -/usr/lib/gcc-lib/i586-suse-linux/3.3.4/include/stdarg.h: +/usr/lib/gcc/i386-redhat-linux/3.4.2/include/stdarg.h: /usr/include/bits/stdio_lim.h: diff --git a/examples/.deps/vector_adapter_ex1.Po b/examples/.deps/vector_adapter_ex1.Po index 73271e2..2c757d1 100644 --- a/examples/.deps/vector_adapter_ex1.Po +++ b/examples/.deps/vector_adapter_ex1.Po @@ -1,7 +1,7 @@ vector_adapter_ex1.o vector_adapter_ex1.o: vector_adapter_ex1.c \ ../src/cmph.h /usr/include/stdlib.h /usr/include/features.h \ /usr/include/sys/cdefs.h /usr/include/gnu/stubs.h \ - /usr/lib/gcc-lib/i586-suse-linux/3.3.4/include/stddef.h \ + /usr/lib/gcc/i386-redhat-linux/3.4.2/include/stddef.h \ /usr/include/sys/types.h /usr/include/bits/types.h \ /usr/include/bits/wordsize.h /usr/include/bits/typesizes.h \ /usr/include/time.h /usr/include/endian.h /usr/include/bits/endian.h \ @@ -11,7 +11,7 @@ vector_adapter_ex1.o vector_adapter_ex1.o: vector_adapter_ex1.c \ /usr/include/bits/sched.h /usr/include/alloca.h /usr/include/stdio.h \ /usr/include/libio.h /usr/include/_G_config.h /usr/include/wchar.h \ /usr/include/bits/wchar.h /usr/include/gconv.h \ - /usr/lib/gcc-lib/i586-suse-linux/3.3.4/include/stdarg.h \ + /usr/lib/gcc/i386-redhat-linux/3.4.2/include/stdarg.h \ /usr/include/bits/stdio_lim.h /usr/include/bits/sys_errlist.h \ /usr/include/bits/stdio.h ../src/cmph_types.h @@ -25,7 +25,7 @@ vector_adapter_ex1.o vector_adapter_ex1.o: vector_adapter_ex1.c \ /usr/include/gnu/stubs.h: -/usr/lib/gcc-lib/i586-suse-linux/3.3.4/include/stddef.h: +/usr/lib/gcc/i386-redhat-linux/3.4.2/include/stddef.h: /usr/include/sys/types.h: @@ -69,7 +69,7 @@ vector_adapter_ex1.o vector_adapter_ex1.o: vector_adapter_ex1.c \ /usr/include/gconv.h: -/usr/lib/gcc-lib/i586-suse-linux/3.3.4/include/stdarg.h: +/usr/lib/gcc/i386-redhat-linux/3.4.2/include/stdarg.h: /usr/include/bits/stdio_lim.h: diff --git a/examples/Makefile b/examples/Makefile index 04bee77..2f4b1f5 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -1,4 +1,4 @@ -# Makefile.in generated by automake 1.9.1 from Makefile.am. +# Makefile.in generated by automake 1.9.2 from Makefile.am. # examples/Makefile. Generated from Makefile.in by configure. # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, @@ -36,8 +36,8 @@ POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : -build_triplet = i686-suse-linux -host_triplet = i686-suse-linux +build_triplet = i686-pc-linux-gnu +host_triplet = i686-pc-linux-gnu noinst_PROGRAMS = vector_adapter_ex1$(EXEEXT) \ file_adapter_ex2$(EXEEXT) subdir = examples @@ -74,14 +74,14 @@ DIST_SOURCES = $(file_adapter_ex2_SOURCES) \ ETAGS = etags CTAGS = ctags DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) -ACLOCAL = ${SHELL} /home/fbotelho/doutorado/algoritmos/cmph/cmph/missing --run aclocal-1.9 +ACLOCAL = ${SHELL} /home/fbotelho/doutorado/algoritmos/cmph/missing --run aclocal-1.9 AMDEP_FALSE = # AMDEP_TRUE = -AMTAR = ${SHELL} /home/fbotelho/doutorado/algoritmos/cmph/cmph/missing --run tar +AMTAR = ${SHELL} /home/fbotelho/doutorado/algoritmos/cmph/missing --run tar AR = ar -AUTOCONF = ${SHELL} /home/fbotelho/doutorado/algoritmos/cmph/cmph/missing --run autoconf -AUTOHEADER = ${SHELL} /home/fbotelho/doutorado/algoritmos/cmph/cmph/missing --run autoheader -AUTOMAKE = ${SHELL} /home/fbotelho/doutorado/algoritmos/cmph/cmph/missing --run automake-1.9 +AUTOCONF = ${SHELL} /home/fbotelho/doutorado/algoritmos/cmph/missing --run autoconf +AUTOHEADER = ${SHELL} /home/fbotelho/doutorado/algoritmos/cmph/missing --run autoheader +AUTOMAKE = ${SHELL} /home/fbotelho/doutorado/algoritmos/cmph/missing --run automake-1.9 AWK = gawk CC = gcc CCDEPMODE = depmode=gcc3 @@ -89,9 +89,9 @@ CFLAGS = -g -O2 -D_FILE_OFFSET_BITS=64 CPP = gcc -E CPPFLAGS = CXX = g++ -CXXCPP = /lib/cpp -CXXDEPMODE = depmode=none -CXXFLAGS = +CXXCPP = g++ -E +CXXDEPMODE = depmode=gcc3 +CXXFLAGS = -g -O2 CYGPATH_W = echo DEFS = -DHAVE_CONFIG_H DEPDIR = .deps @@ -101,8 +101,8 @@ ECHO_N = -n ECHO_T = EGREP = grep -E EXEEXT = -F77 = -FFLAGS = +F77 = g77 +FFLAGS = -g -O2 GETCONF = getconf INSTALL_DATA = ${INSTALL} -m 644 INSTALL_PROGRAM = ${INSTALL} @@ -114,7 +114,7 @@ LIBS = LIBTOOL = $(SHELL) $(top_builddir)/libtool LN_S = ln -s LTLIBOBJS = -MAKEINFO = ${SHELL} /home/fbotelho/doutorado/algoritmos/cmph/cmph/missing --run makeinfo +MAKEINFO = ${SHELL} /home/fbotelho/doutorado/algoritmos/cmph/missing --run makeinfo OBJEXT = o PACKAGE = cmph PACKAGE_BUGREPORT = @@ -131,35 +131,35 @@ VERSION = 0.3 ac_ct_AR = ar ac_ct_CC = gcc ac_ct_CXX = g++ -ac_ct_F77 = +ac_ct_F77 = g77 ac_ct_GETCONF = getconf ac_ct_RANLIB = ranlib ac_ct_STRIP = strip am__fastdepCC_FALSE = # am__fastdepCC_TRUE = -am__fastdepCXX_FALSE = -am__fastdepCXX_TRUE = # +am__fastdepCXX_FALSE = # +am__fastdepCXX_TRUE = am__include = include am__leading_dot = . am__quote = am__tar = ${AMTAR} chof - "$$tardir" am__untar = ${AMTAR} xf - bindir = ${exec_prefix}/bin -build = i686-suse-linux +build = i686-pc-linux-gnu build_alias = build_cpu = i686 -build_os = linux -build_vendor = suse +build_os = linux-gnu +build_vendor = pc datadir = ${prefix}/share exec_prefix = ${prefix} -host = i686-suse-linux +host = i686-pc-linux-gnu host_alias = host_cpu = i686 -host_os = linux -host_vendor = suse +host_os = linux-gnu +host_vendor = pc includedir = ${prefix}/include infodir = ${prefix}/info -install_sh = /home/fbotelho/doutorado/algoritmos/cmph/cmph/install-sh +install_sh = /home/fbotelho/doutorado/algoritmos/cmph/install-sh libdir = ${exec_prefix}/lib libexecdir = ${exec_prefix}/libexec localstatedir = ${prefix}/var diff --git a/examples/Makefile.in b/examples/Makefile.in index 2cf84b1..639e5c6 100755 --- a/examples/Makefile.in +++ b/examples/Makefile.in @@ -1,4 +1,4 @@ -# Makefile.in generated by automake 1.9.1 from Makefile.am. +# Makefile.in generated by automake 1.9.2 from Makefile.am. # @configure_input@ # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, diff --git a/src/bmz.c b/src/bmz.c index 7a3d069..f3efc96 100644 --- a/src/bmz.c +++ b/src/bmz.c @@ -15,13 +15,6 @@ //#define DEBUG #include "debug.h" -//static cmph_uint32 UNDEFINED = UINT_MAX; - -/* static const char bitmask[8] = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 }; */ -/* #define GETBIT(array, i) (array[(i) / 8] & bitmask[(i) % 8]) */ -/* #define SETBIT(array, i) (array[(i) / 8] |= bitmask[(i) % 8]) */ -/* #define UNSETBIT(array, i) (array[(i) / 8] &= (~(bitmask[(i) % 8]))) */ - static int bmz_gen_edges(cmph_config_t *mph); static cmph_uint8 bmz_traverse_critical_nodes(bmz_config_data_t *bmz, cmph_uint32 v, cmph_uint32 * biggest_g_value, cmph_uint32 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited); static cmph_uint8 bmz_traverse_critical_nodes_heuristic(bmz_config_data_t *bmz, cmph_uint32 v, cmph_uint32 * biggest_g_value, cmph_uint32 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited); @@ -535,7 +528,7 @@ cmph_uint32 bmz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen) DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2); if (h1 == h2 && ++h2 > bmz->n) h2 = 0; DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, bmz->g[h1], bmz->g[h2], bmz->m); - return (bmz->g[h1] + bmz->g[h2]); + return bmz->g[h1] + bmz->g[h2]; } void bmz_destroy(cmph_t *mphf) { diff --git a/src/bmz8.c b/src/bmz8.c index 65de52f..5b5c275 100644 --- a/src/bmz8.c +++ b/src/bmz8.c @@ -87,7 +87,7 @@ cmph_t *bmz8_new(cmph_config_t *mph, float c) { // Mapping step cmph_uint8 biggest_g_value = 0; - cmph_uint8 biggest_edge_value = 1; + cmph_uint8 biggest_edge_value = 1; iterations = 100; if (mph->verbosity) { @@ -100,7 +100,7 @@ cmph_t *bmz8_new(cmph_config_t *mph, float c) bmz8->hashes[0] = hash_state_new(bmz8->hashfuncs[0], bmz8->n); DEBUGP("hash function 2\n"); bmz8->hashes[1] = hash_state_new(bmz8->hashfuncs[1], bmz8->n); - DEBUGP("Generating edges\n"); + DEBUGP("Generating edges\n"); ok = bmz8_gen_edges(mph); if (!ok) { @@ -120,7 +120,7 @@ cmph_t *bmz8_new(cmph_config_t *mph, float c) } if (iterations == 0) { - graph_destroy(bmz8->graph); + graph_destroy(bmz8->graph); return NULL; } @@ -375,9 +375,10 @@ static void bmz8_traverse(bmz8_config_data_t *bmz8, cmph_uint8 * used_edges, cmp while((neighbor = graph_next_neighbor(bmz8->graph, &it)) != GRAPH_NO_NEIGHBOR) { if(GETBIT(visited,neighbor)) continue; - DEBUGP("Visiting neighbor %u\n", neighbor); + //DEBUGP("Visiting neighbor %u\n", neighbor); *unused_edge_index = next_unused_edge(bmz8, used_edges, *unused_edge_index); bmz8->g[neighbor] = *unused_edge_index - bmz8->g[v]; + //if (bmz8->g[neighbor] >= bmz8->m) bmz8->g[neighbor] += bmz8->m; SETBIT(visited, neighbor); (*unused_edge_index)++; bmz8_traverse(bmz8, used_edges, neighbor, unused_edge_index, visited); @@ -437,7 +438,7 @@ static int bmz8_gen_edges(cmph_config_t *mph) mph->key_source->dispose(mph->key_source->data, key, keylen); return 0; } - DEBUGP("Adding edge: %u -> %u for key %s\n", h1, h2, key); + //DEBUGP("Adding edge: %u -> %u for key %s\n", h1, h2, key); mph->key_source->dispose(mph->key_source->data, key, keylen); // fprintf(stderr, "key = %s -- dispose BMZ\n", key); multiple_edges = graph_contains_edge(bmz8->graph, h1, h2); diff --git a/src/brz.c b/src/brz.c index 3e15711..195195d 100755 --- a/src/brz.c +++ b/src/brz.c @@ -1,7 +1,7 @@ #include "graph.h" -#include "bmz.h" -#include "bmz_structs.h" +#include "bmz8.h" +#include "bmz8_structs.h" #include "brz.h" #include "cmph_structs.h" #include "brz_structs.h" @@ -22,7 +22,7 @@ static int brz_gen_graphs(cmph_config_t *mph); static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n); static char * brz_read_key(FILE * fd); static void brz_destroy_keys_vd(char ** keys_vd, cmph_uint8 nkeys); -static void brz_copy_partial_mphf(brz_config_data_t *brz, bmz_data_t * bmzf, cmph_uint32 index, cmph_io_adapter_t *source); +static void brz_copy_partial_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_io_adapter_t *source); brz_config_data_t *brz_config_new() { @@ -37,6 +37,7 @@ brz_config_data_t *brz_config_new() brz->h1 = NULL; brz->h2 = NULL; brz->h3 = NULL; + brz->memory_availability = 1024*1024; brz->tmp_dir = (cmph_uint8 *)calloc(10, sizeof(cmph_uint8)); strcpy(brz->tmp_dir, "/var/tmp/\0"); assert(brz); @@ -64,6 +65,12 @@ void brz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs) } } +void brz_config_set_memory_availability(cmph_config_t *mph, cmph_uint32 memory_availability) +{ + brz_config_data_t *brz = (brz_config_data_t *)mph->data; + brz->memory_availability = memory_availability*1024*1024; +} + void brz_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir) { brz_config_data_t *brz = (brz_config_data_t *)mph->data; @@ -84,73 +91,6 @@ void brz_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir) } } -// static cmph_uint8 brz_verify_mphf(cmph_t * mphf, cmph_io_adapter_t *source) -// { -// cmph_uint8 * hashtable = NULL; -// cmph_uint32 i; -// hashtable = (cmph_uint8*)malloc(source->nkeys*sizeof(cmph_uint8)); -// source->rewind(source->data); -// memset(hashtable, 0, source->nkeys); -// //check all keys -// for (i = 0; i < source->nkeys; ++i) -// { -// cmph_uint32 h; -// char *buf; -// cmph_uint32 buflen = 0; -// source->read(source->data, &buf, &buflen); -// h = cmph_search(mphf, buf, buflen); -// if(hashtable[h]) -// { -// fprintf(stderr, "collision: %u\n",h); -// return 0; -// } -// //assert(hashtable[h]==0); -// hashtable[h] = 1; -// source->dispose(source->data, buf, buflen); -// } -// fprintf(stderr, "\n===============================================================================\n"); -// free(hashtable); -// return 1; -// } -// -// static cmph_uint8 brz_verify_mphf1(hash_state_t *h1, hash_state_t *h2, cmph_uint8 * g, cmph_uint32 n, cmph_io_adapter_t *source) -// { -// cmph_uint8 * hashtable = NULL; -// cmph_uint32 i; -// hashtable = (cmph_uint8*)calloc(source->nkeys, sizeof(cmph_uint8)); -// source->rewind(source->data); -// //memset(hashtable, 0, source->nkeys); -// //check all keys -// for (i = 0; i < source->nkeys; ++i) -// { -// cmph_uint32 h1_v; -// cmph_uint32 h2_v; -// cmph_uint32 h; -// char *buf; -// cmph_uint32 buflen = 0; -// source->read(source->data, &buf, &buflen); -// -// h1_v = hash(h1, buf, buflen) % n; -// -// h2_v = hash(h2, buf, buflen) % n; -// -// if (h1_v == h2_v && ++h2_v >= n) h2_v = 0; -// -// h = ((cmph_uint32)g[h1_v] + (cmph_uint32)g[h2_v]) % source->nkeys; -// -// if(hashtable[h]) -// { -// fprintf(stderr, "collision: %u\n",h); -// return 0; -// } -// //assert(hashtable[h]==0); -// hashtable[h] = 1; -// source->dispose(source->data, buf, buflen); -// -// } -// free(hashtable); -// return 1; -// } cmph_t *brz_new(cmph_config_t *mph, float c) { @@ -244,13 +184,12 @@ cmph_t *brz_new(cmph_config_t *mph, float c) static int brz_gen_graphs(cmph_config_t *mph) { -#pragma pack(1) cmph_uint32 i, e; brz_config_data_t *brz = (brz_config_data_t *)mph->data; - cmph_uint32 memory_availability = 209715200;//200MB //104857600;//100MB //524288000; // 500MB //209715200; // 200 MB + //cmph_uint32 memory_availability = 200*1024*1024; cmph_uint32 memory_usage = 0; cmph_uint32 nkeys_in_buffer = 0; - cmph_uint8 *buffer = (cmph_uint8 *)malloc(memory_availability); + cmph_uint8 *buffer = (cmph_uint8 *)malloc(brz->memory_availability); cmph_uint32 *buckets_size = (cmph_uint32 *)calloc(brz->k, sizeof(cmph_uint32)); cmph_uint32 *keys_index = NULL; cmph_uint8 **buffer_merge = NULL; @@ -276,7 +215,7 @@ static int brz_gen_graphs(cmph_config_t *mph) mph->key_source->read(mph->key_source->data, &key, &keylen); /* Buffers management */ - if (memory_usage + keylen + 1 > memory_availability) // flush buffers + if (memory_usage + keylen + 1 > brz->memory_availability) // flush buffers { if(mph->verbosity) { @@ -305,7 +244,6 @@ static int brz_gen_graphs(cmph_config_t *mph) buckets_size[h3]++; memory_usage = memory_usage + keylen1 + 1; } -// sprintf(filename, "/mnt/hd4/fbotelho/%u.cmph",nflushes); filename = (char *)calloc(strlen(brz->tmp_dir) + 11, sizeof(char)); sprintf(filename, "%s%u.cmph",brz->tmp_dir, nflushes); tmp_fd = fopen(filename, "wb"); @@ -323,11 +261,10 @@ static int brz_gen_graphs(cmph_config_t *mph) free(keys_index); fclose(tmp_fd); } - //fprintf(stderr, "Storing read Key\n"); memcpy(buffer + memory_usage, key, keylen + 1); memory_usage = memory_usage + keylen + 1; h3 = hash(brz->h3, key, keylen) % brz->k; - if (brz->size[h3] == MAX_BUCKET_SIZE) + if ((brz->size[h3] == MAX_BUCKET_SIZE) || ((brz->c >= 1.0) && (cmph_uint8)(brz->c * brz->size[h3]) < brz->size[h3])) { free(buffer); free(buckets_size); @@ -367,8 +304,6 @@ static int brz_gen_graphs(cmph_config_t *mph) buckets_size[h3]++; memory_usage = memory_usage + keylen1 + 1; } -// sprintf(filename, "/mnt/hd4/fbotelho/%u.cmph",nflushes); -// sprintf(filename, "/mnt/sd2/fbotelho/dados/%u.cmph",nflushes); filename = (char *)calloc(strlen(brz->tmp_dir) + 11, sizeof(char)); sprintf(filename, "%s%u.cmph",brz->tmp_dir, nflushes); tmp_fd = fopen(filename, "wb"); @@ -401,8 +336,6 @@ static int brz_gen_graphs(cmph_config_t *mph) for(i = 0; i < nflushes; i++) { -// sprintf(filename, "/mnt/hd4/fbotelho/%u.cmph",i); -// sprintf(filename, "/mnt/sd2/fbotelho/dados/%u.cmph",i); filename = (char *)calloc(strlen(brz->tmp_dir) + 11, sizeof(char)); sprintf(filename, "%s%u.cmph",brz->tmp_dir, i); tmp_fds[i] = fopen(filename, "rb"); @@ -420,7 +353,6 @@ static int brz_gen_graphs(cmph_config_t *mph) e = 0; keys_vd = (char **)calloc(MAX_BUCKET_SIZE, sizeof(char *)); nkeys_vd = 0; - //buffer = (cmph_uint8 *)malloc(memory_availability); while(e < brz->m) { i = brz_min_index(buffer_h3, nflushes); @@ -436,56 +368,43 @@ static int brz_gen_graphs(cmph_config_t *mph) if (h3 != buffer_h3[i]) break; keys_vd[nkeys_vd++] = key; - - //save_in_disk(buffer, key, keylen, &memory_usage, memory_availability, graphs_fd); - //fwrite(key, 1, keylen + 1, graphs_fd); e++; - //free(key); key = brz_read_key(tmp_fds[i]); } if (key) { - //save_in_disk(buffer, buffer_merge[i], strlen(buffer_merge[i]), &memory_usage, memory_availability, graphs_fd); assert(nkeys_vd < brz->size[cur_bucket]); keys_vd[nkeys_vd++] = buffer_merge[i]; - //fwrite(buffer_merge[i], 1, strlen(buffer_merge[i]) + 1, graphs_fd); e++; buffer_h3[i] = h3; - //free(buffer_merge[i]); buffer_merge[i] = (cmph_uint8 *)calloc(keylen + 1, sizeof(cmph_uint8)); memcpy(buffer_merge[i], key, keylen + 1); free(key); } } -/* fprintf(stderr, "BOSTA %u %u e: %u\n", i, buffer_h3[i], e);*/ if(!key) { assert(nkeys_vd < brz->size[cur_bucket]); keys_vd[nkeys_vd++] = buffer_merge[i]; - //save_in_disk(buffer, buffer_merge[i], strlen(buffer_merge[i]), &memory_usage, memory_availability, graphs_fd); - //fwrite(buffer_merge[i], 1, strlen(buffer_merge[i]) + 1, graphs_fd); e++; buffer_h3[i] = UINT_MAX; - //free(buffer_merge[i]); buffer_merge[i] = NULL; } - if(nkeys_vd == brz->size[cur_bucket]) // Generating mphf. + if(nkeys_vd == brz->size[cur_bucket]) // Generating mphf for each bucket. { cmph_io_adapter_t *source = NULL; cmph_config_t *config = NULL; cmph_t *mphf_tmp = NULL; - bmz_data_t * bmzf = NULL; + bmz8_data_t * bmzf = NULL; // Source of keys - //fprintf(stderr, "Generating mphf %u in %u \n",cur_bucket + 1, brz->k); if(nkeys_vd > max_size) max_size = nkeys_vd; source = cmph_io_vector_adapter(keys_vd, (cmph_uint32)nkeys_vd); config = cmph_config_new(source); - cmph_config_set_algo(config, CMPH_BMZ); + cmph_config_set_algo(config, CMPH_BMZ8); cmph_config_set_graphsize(config, brz->c); mphf_tmp = cmph_new(config); - bmzf = (bmz_data_t *)mphf_tmp->data; - //assert(brz_verify_mphf(mphf_tmp, source)); + bmzf = (bmz8_data_t *)mphf_tmp->data; brz_copy_partial_mphf(brz, bmzf, cur_bucket, source); cmph_config_destroy(config); brz_destroy_keys_vd(keys_vd, nkeys_vd); @@ -495,14 +414,12 @@ static int brz_gen_graphs(cmph_config_t *mph) } } for(i = 0; i < nflushes; i++) fclose(tmp_fds[i]); - //flush_buffer(buffer, &memory_usage, graphs_fd); free(tmp_fds); free(keys_vd); free(buffer_merge); free(buffer_h3); fprintf(stderr, "Maximal Size: %u\n", max_size); return 1; -#pragma pack() } static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n) @@ -541,7 +458,7 @@ static void brz_destroy_keys_vd(char ** keys_vd, cmph_uint8 nkeys) for(i = 0; i < nkeys; i++) free(keys_vd[i]); } -static void brz_copy_partial_mphf(brz_config_data_t *brz, bmz_data_t * bmzf, cmph_uint32 index, cmph_io_adapter_t *source) +static void brz_copy_partial_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_io_adapter_t *source) { cmph_uint32 i; cmph_uint32 n = ceil(brz->c * brz->size[index]); @@ -549,13 +466,11 @@ static void brz_copy_partial_mphf(brz_config_data_t *brz, bmz_data_t * bmzf, cmp brz->g[index] = (cmph_uint8 *)calloc(n, sizeof(cmph_uint8)); for(i = 0; i < n; i++) { - brz->g[index][i] = (cmph_uint8) bmzf->g[i]; + brz->g[index][i] = bmzf->g[i]; //fprintf(stderr, "gsrc[%u]: %u gdest: %u\n", i, (cmph_uint8) bmzf->g[i], brz->g[index][i]); } brz->h1[index] = hash_state_copy(bmzf->hashes[0]); brz->h2[index] = hash_state_copy(bmzf->hashes[1]); - //brz->size[index] = bmzf->n; - //assert(brz_verify_mphf1(brz->h1[index], brz->h2[index], brz->g[index], n, source)); } int brz_dump(cmph_t *mphf, FILE *fd) @@ -675,11 +590,13 @@ cmph_uint32 brz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen) cmph_uint32 n = ceil(brz->c * m); cmph_uint32 h1 = hash(brz->h1[h3], key, keylen) % n; cmph_uint32 h2 = hash(brz->h2[h3], key, keylen) % n; + cmph_uint8 mphf_bucket; if (h1 == h2 && ++h2 >= n) h2 = 0; + mphf_bucket = brz->g[h3][h1] + brz->g[h3][h2]; DEBUGP("key: %s h1: %u h2: %u h3: %u\n", key, h1, h2, h3); DEBUGP("key: %s g[h1]: %u g[h2]: %u offset[h3]: %u edges: %u\n", key, brz->g[h3][h1], brz->g[h3][h2], brz->offset[h3], brz->m); - DEBUGP("Address: %u\n", (((cmph_uint32)brz->g[h3][h1] + (cmph_uint32)brz->g[h3][h2])% m + brz->offset[h3])); - return (((cmph_uint32)brz->g[h3][h1] + (cmph_uint32)brz->g[h3][h2])% m + brz->offset[h3]); + DEBUGP("Address: %u\n", mphf_bucket + brz->offset[h3]); + return (mphf_bucket + brz->offset[h3]); } void brz_destroy(cmph_t *mphf) { diff --git a/src/brz_structs.h b/src/brz_structs.h index f6af310..adee286 100755 --- a/src/brz_structs.h +++ b/src/brz_structs.h @@ -28,6 +28,7 @@ struct __brz_config_data_t hash_state_t **h1; hash_state_t **h2; hash_state_t * h3; + cmph_uint32 memory_availability; cmph_uint8 * tmp_dir; // temporary directory }; diff --git a/src/cmph.c b/src/cmph.c index 8f86f4e..30bee18 100644 --- a/src/cmph.c +++ b/src/cmph.c @@ -173,10 +173,11 @@ void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo) mph->data = bmz_config_new(); break; case CMPH_BMZ8: - mph->data = (void*)bmz8_config_new(); + mph->data = bmz8_config_new(); break; case CMPH_BRZ: mph->data = brz_config_new(); + break; default: assert(0); } @@ -219,6 +220,7 @@ void cmph_config_destroy(cmph_config_t *mph) break; case CMPH_BRZ: /* included -- Fabiano */ brz_config_destroy(mph); + break; default: assert(0); }