1
Fork 0

Stable version of BRZ algorithm

This commit is contained in:
fc_botelho 2005-09-06 14:11:37 +00:00
parent 3181ab91f2
commit d2aeaae27c
10 changed files with 95 additions and 179 deletions

40
INSTALL
View File

@ -1,5 +1,8 @@
Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software Installation Instructions
Foundation, Inc. *************************
Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004 Free
Software Foundation, Inc.
This file is free documentation; the Free Software Foundation gives This file is free documentation; the Free Software Foundation gives
unlimited permission to copy, distribute and modify it. unlimited permission to copy, distribute and modify it.
@ -67,9 +70,9 @@ The simplest way to compile this package is:
Compilers and Options Compilers and Options
===================== =====================
Some systems require unusual options for compilation or linking that Some systems require unusual options for compilation or linking that the
the `configure' script does not know about. Run `./configure --help' `configure' script does not know about. Run `./configure --help' for
for details on some of the pertinent environment variables. details on some of the pertinent environment variables.
You can give `configure' initial values for configuration parameters You can give `configure' initial values for configuration parameters
by setting variables in the command line or in the environment. Here by setting variables in the command line or in the environment. Here
@ -102,16 +105,16 @@ Installation Names
By default, `make install' will install the package's files in By default, `make install' will install the package's files in
`/usr/local/bin', `/usr/local/man', etc. You can specify an `/usr/local/bin', `/usr/local/man', etc. You can specify an
installation prefix other than `/usr/local' by giving `configure' the installation prefix other than `/usr/local' by giving `configure' the
option `--prefix=PATH'. option `--prefix=PREFIX'.
You can specify separate installation prefixes for You can specify separate installation prefixes for
architecture-specific files and architecture-independent files. If you architecture-specific files and architecture-independent files. If you
give `configure' the option `--exec-prefix=PATH', the package will use give `configure' the option `--exec-prefix=PREFIX', the package will
PATH as the prefix for installing programs and libraries. use PREFIX as the prefix for installing programs and libraries.
Documentation and other data files will still use the regular prefix. Documentation and other data files will still use the regular prefix.
In addition, if you use an unusual directory layout you can give In addition, if you use an unusual directory layout you can give
options like `--bindir=PATH' to specify different values for particular options like `--bindir=DIR' to specify different values for particular
kinds of files. Run `configure --help' for a list of the directories kinds of files. Run `configure --help' for a list of the directories
you can set and what kinds of files go in them. you can set and what kinds of files go in them.
@ -137,11 +140,11 @@ you can use the `configure' options `--x-includes=DIR' and
Specifying the System Type Specifying the System Type
========================== ==========================
There may be some features `configure' cannot figure out There may be some features `configure' cannot figure out automatically,
automatically, but needs to determine by the type of machine the package but needs to determine by the type of machine the package will run on.
will run on. Usually, assuming the package is built to be run on the Usually, assuming the package is built to be run on the _same_
_same_ architectures, `configure' can figure that out, but if it prints architectures, `configure' can figure that out, but if it prints a
a message saying it cannot guess the machine type, give it the message saying it cannot guess the machine type, give it the
`--build=TYPE' option. TYPE can either be a short name for the system `--build=TYPE' option. TYPE can either be a short name for the system
type, such as `sun4', or a canonical name which has the form: type, such as `sun4', or a canonical name which has the form:
@ -167,9 +170,9 @@ eventually be run) with `--host=TYPE'.
Sharing Defaults Sharing Defaults
================ ================
If you want to set default values for `configure' scripts to share, If you want to set default values for `configure' scripts to share, you
you can create a site shell script called `config.site' that gives can create a site shell script called `config.site' that gives default
default values for variables like `CC', `cache_file', and `prefix'. values for variables like `CC', `cache_file', and `prefix'.
`configure' looks for `PREFIX/share/config.site' if it exists, then `configure' looks for `PREFIX/share/config.site' if it exists, then
`PREFIX/etc/config.site' if it exists. Or, you can set the `PREFIX/etc/config.site' if it exists. Or, you can set the
`CONFIG_SITE' environment variable to the location of the site script. `CONFIG_SITE' environment variable to the location of the site script.
@ -192,8 +195,7 @@ overridden in the site shell script).
`configure' Invocation `configure' Invocation
====================== ======================
`configure' recognizes the following options to control how it `configure' recognizes the following options to control how it operates.
operates.
`--help' `--help'
`-h' `-h'

View File

@ -1,7 +1,7 @@
file_adapter_ex2.o file_adapter_ex2.o: file_adapter_ex2.c ../src/cmph.h \ file_adapter_ex2.o file_adapter_ex2.o: file_adapter_ex2.c ../src/cmph.h \
/usr/include/stdlib.h /usr/include/features.h /usr/include/sys/cdefs.h \ /usr/include/stdlib.h /usr/include/features.h /usr/include/sys/cdefs.h \
/usr/include/gnu/stubs.h \ /usr/include/gnu/stubs.h \
/usr/lib/gcc-lib/i586-suse-linux/3.3.4/include/stddef.h \ /usr/lib/gcc/i386-redhat-linux/3.4.2/include/stddef.h \
/usr/include/sys/types.h /usr/include/bits/types.h \ /usr/include/sys/types.h /usr/include/bits/types.h \
/usr/include/bits/wordsize.h /usr/include/bits/typesizes.h \ /usr/include/bits/wordsize.h /usr/include/bits/typesizes.h \
/usr/include/time.h /usr/include/endian.h /usr/include/bits/endian.h \ /usr/include/time.h /usr/include/endian.h /usr/include/bits/endian.h \
@ -11,7 +11,7 @@ file_adapter_ex2.o file_adapter_ex2.o: file_adapter_ex2.c ../src/cmph.h \
/usr/include/bits/sched.h /usr/include/alloca.h /usr/include/stdio.h \ /usr/include/bits/sched.h /usr/include/alloca.h /usr/include/stdio.h \
/usr/include/libio.h /usr/include/_G_config.h /usr/include/wchar.h \ /usr/include/libio.h /usr/include/_G_config.h /usr/include/wchar.h \
/usr/include/bits/wchar.h /usr/include/gconv.h \ /usr/include/bits/wchar.h /usr/include/gconv.h \
/usr/lib/gcc-lib/i586-suse-linux/3.3.4/include/stdarg.h \ /usr/lib/gcc/i386-redhat-linux/3.4.2/include/stdarg.h \
/usr/include/bits/stdio_lim.h /usr/include/bits/sys_errlist.h \ /usr/include/bits/stdio_lim.h /usr/include/bits/sys_errlist.h \
/usr/include/bits/stdio.h ../src/cmph_types.h /usr/include/bits/stdio.h ../src/cmph_types.h
@ -25,7 +25,7 @@ file_adapter_ex2.o file_adapter_ex2.o: file_adapter_ex2.c ../src/cmph.h \
/usr/include/gnu/stubs.h: /usr/include/gnu/stubs.h:
/usr/lib/gcc-lib/i586-suse-linux/3.3.4/include/stddef.h: /usr/lib/gcc/i386-redhat-linux/3.4.2/include/stddef.h:
/usr/include/sys/types.h: /usr/include/sys/types.h:
@ -69,7 +69,7 @@ file_adapter_ex2.o file_adapter_ex2.o: file_adapter_ex2.c ../src/cmph.h \
/usr/include/gconv.h: /usr/include/gconv.h:
/usr/lib/gcc-lib/i586-suse-linux/3.3.4/include/stdarg.h: /usr/lib/gcc/i386-redhat-linux/3.4.2/include/stdarg.h:
/usr/include/bits/stdio_lim.h: /usr/include/bits/stdio_lim.h:

View File

@ -1,7 +1,7 @@
vector_adapter_ex1.o vector_adapter_ex1.o: vector_adapter_ex1.c \ vector_adapter_ex1.o vector_adapter_ex1.o: vector_adapter_ex1.c \
../src/cmph.h /usr/include/stdlib.h /usr/include/features.h \ ../src/cmph.h /usr/include/stdlib.h /usr/include/features.h \
/usr/include/sys/cdefs.h /usr/include/gnu/stubs.h \ /usr/include/sys/cdefs.h /usr/include/gnu/stubs.h \
/usr/lib/gcc-lib/i586-suse-linux/3.3.4/include/stddef.h \ /usr/lib/gcc/i386-redhat-linux/3.4.2/include/stddef.h \
/usr/include/sys/types.h /usr/include/bits/types.h \ /usr/include/sys/types.h /usr/include/bits/types.h \
/usr/include/bits/wordsize.h /usr/include/bits/typesizes.h \ /usr/include/bits/wordsize.h /usr/include/bits/typesizes.h \
/usr/include/time.h /usr/include/endian.h /usr/include/bits/endian.h \ /usr/include/time.h /usr/include/endian.h /usr/include/bits/endian.h \
@ -11,7 +11,7 @@ vector_adapter_ex1.o vector_adapter_ex1.o: vector_adapter_ex1.c \
/usr/include/bits/sched.h /usr/include/alloca.h /usr/include/stdio.h \ /usr/include/bits/sched.h /usr/include/alloca.h /usr/include/stdio.h \
/usr/include/libio.h /usr/include/_G_config.h /usr/include/wchar.h \ /usr/include/libio.h /usr/include/_G_config.h /usr/include/wchar.h \
/usr/include/bits/wchar.h /usr/include/gconv.h \ /usr/include/bits/wchar.h /usr/include/gconv.h \
/usr/lib/gcc-lib/i586-suse-linux/3.3.4/include/stdarg.h \ /usr/lib/gcc/i386-redhat-linux/3.4.2/include/stdarg.h \
/usr/include/bits/stdio_lim.h /usr/include/bits/sys_errlist.h \ /usr/include/bits/stdio_lim.h /usr/include/bits/sys_errlist.h \
/usr/include/bits/stdio.h ../src/cmph_types.h /usr/include/bits/stdio.h ../src/cmph_types.h
@ -25,7 +25,7 @@ vector_adapter_ex1.o vector_adapter_ex1.o: vector_adapter_ex1.c \
/usr/include/gnu/stubs.h: /usr/include/gnu/stubs.h:
/usr/lib/gcc-lib/i586-suse-linux/3.3.4/include/stddef.h: /usr/lib/gcc/i386-redhat-linux/3.4.2/include/stddef.h:
/usr/include/sys/types.h: /usr/include/sys/types.h:
@ -69,7 +69,7 @@ vector_adapter_ex1.o vector_adapter_ex1.o: vector_adapter_ex1.c \
/usr/include/gconv.h: /usr/include/gconv.h:
/usr/lib/gcc-lib/i586-suse-linux/3.3.4/include/stdarg.h: /usr/lib/gcc/i386-redhat-linux/3.4.2/include/stdarg.h:
/usr/include/bits/stdio_lim.h: /usr/include/bits/stdio_lim.h:

View File

@ -1,4 +1,4 @@
# Makefile.in generated by automake 1.9.1 from Makefile.am. # Makefile.in generated by automake 1.9.2 from Makefile.am.
# examples/Makefile. Generated from Makefile.in by configure. # examples/Makefile. Generated from Makefile.in by configure.
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
@ -36,8 +36,8 @@ POST_INSTALL = :
NORMAL_UNINSTALL = : NORMAL_UNINSTALL = :
PRE_UNINSTALL = : PRE_UNINSTALL = :
POST_UNINSTALL = : POST_UNINSTALL = :
build_triplet = i686-suse-linux build_triplet = i686-pc-linux-gnu
host_triplet = i686-suse-linux host_triplet = i686-pc-linux-gnu
noinst_PROGRAMS = vector_adapter_ex1$(EXEEXT) \ noinst_PROGRAMS = vector_adapter_ex1$(EXEEXT) \
file_adapter_ex2$(EXEEXT) file_adapter_ex2$(EXEEXT)
subdir = examples subdir = examples
@ -74,14 +74,14 @@ DIST_SOURCES = $(file_adapter_ex2_SOURCES) \
ETAGS = etags ETAGS = etags
CTAGS = ctags CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
ACLOCAL = ${SHELL} /home/fbotelho/doutorado/algoritmos/cmph/cmph/missing --run aclocal-1.9 ACLOCAL = ${SHELL} /home/fbotelho/doutorado/algoritmos/cmph/missing --run aclocal-1.9
AMDEP_FALSE = # AMDEP_FALSE = #
AMDEP_TRUE = AMDEP_TRUE =
AMTAR = ${SHELL} /home/fbotelho/doutorado/algoritmos/cmph/cmph/missing --run tar AMTAR = ${SHELL} /home/fbotelho/doutorado/algoritmos/cmph/missing --run tar
AR = ar AR = ar
AUTOCONF = ${SHELL} /home/fbotelho/doutorado/algoritmos/cmph/cmph/missing --run autoconf AUTOCONF = ${SHELL} /home/fbotelho/doutorado/algoritmos/cmph/missing --run autoconf
AUTOHEADER = ${SHELL} /home/fbotelho/doutorado/algoritmos/cmph/cmph/missing --run autoheader AUTOHEADER = ${SHELL} /home/fbotelho/doutorado/algoritmos/cmph/missing --run autoheader
AUTOMAKE = ${SHELL} /home/fbotelho/doutorado/algoritmos/cmph/cmph/missing --run automake-1.9 AUTOMAKE = ${SHELL} /home/fbotelho/doutorado/algoritmos/cmph/missing --run automake-1.9
AWK = gawk AWK = gawk
CC = gcc CC = gcc
CCDEPMODE = depmode=gcc3 CCDEPMODE = depmode=gcc3
@ -89,9 +89,9 @@ CFLAGS = -g -O2 -D_FILE_OFFSET_BITS=64
CPP = gcc -E CPP = gcc -E
CPPFLAGS = CPPFLAGS =
CXX = g++ CXX = g++
CXXCPP = /lib/cpp CXXCPP = g++ -E
CXXDEPMODE = depmode=none CXXDEPMODE = depmode=gcc3
CXXFLAGS = CXXFLAGS = -g -O2
CYGPATH_W = echo CYGPATH_W = echo
DEFS = -DHAVE_CONFIG_H DEFS = -DHAVE_CONFIG_H
DEPDIR = .deps DEPDIR = .deps
@ -101,8 +101,8 @@ ECHO_N = -n
ECHO_T = ECHO_T =
EGREP = grep -E EGREP = grep -E
EXEEXT = EXEEXT =
F77 = F77 = g77
FFLAGS = FFLAGS = -g -O2
GETCONF = getconf GETCONF = getconf
INSTALL_DATA = ${INSTALL} -m 644 INSTALL_DATA = ${INSTALL} -m 644
INSTALL_PROGRAM = ${INSTALL} INSTALL_PROGRAM = ${INSTALL}
@ -114,7 +114,7 @@ LIBS =
LIBTOOL = $(SHELL) $(top_builddir)/libtool LIBTOOL = $(SHELL) $(top_builddir)/libtool
LN_S = ln -s LN_S = ln -s
LTLIBOBJS = LTLIBOBJS =
MAKEINFO = ${SHELL} /home/fbotelho/doutorado/algoritmos/cmph/cmph/missing --run makeinfo MAKEINFO = ${SHELL} /home/fbotelho/doutorado/algoritmos/cmph/missing --run makeinfo
OBJEXT = o OBJEXT = o
PACKAGE = cmph PACKAGE = cmph
PACKAGE_BUGREPORT = PACKAGE_BUGREPORT =
@ -131,35 +131,35 @@ VERSION = 0.3
ac_ct_AR = ar ac_ct_AR = ar
ac_ct_CC = gcc ac_ct_CC = gcc
ac_ct_CXX = g++ ac_ct_CXX = g++
ac_ct_F77 = ac_ct_F77 = g77
ac_ct_GETCONF = getconf ac_ct_GETCONF = getconf
ac_ct_RANLIB = ranlib ac_ct_RANLIB = ranlib
ac_ct_STRIP = strip ac_ct_STRIP = strip
am__fastdepCC_FALSE = # am__fastdepCC_FALSE = #
am__fastdepCC_TRUE = am__fastdepCC_TRUE =
am__fastdepCXX_FALSE = am__fastdepCXX_FALSE = #
am__fastdepCXX_TRUE = # am__fastdepCXX_TRUE =
am__include = include am__include = include
am__leading_dot = . am__leading_dot = .
am__quote = am__quote =
am__tar = ${AMTAR} chof - "$$tardir" am__tar = ${AMTAR} chof - "$$tardir"
am__untar = ${AMTAR} xf - am__untar = ${AMTAR} xf -
bindir = ${exec_prefix}/bin bindir = ${exec_prefix}/bin
build = i686-suse-linux build = i686-pc-linux-gnu
build_alias = build_alias =
build_cpu = i686 build_cpu = i686
build_os = linux build_os = linux-gnu
build_vendor = suse build_vendor = pc
datadir = ${prefix}/share datadir = ${prefix}/share
exec_prefix = ${prefix} exec_prefix = ${prefix}
host = i686-suse-linux host = i686-pc-linux-gnu
host_alias = host_alias =
host_cpu = i686 host_cpu = i686
host_os = linux host_os = linux-gnu
host_vendor = suse host_vendor = pc
includedir = ${prefix}/include includedir = ${prefix}/include
infodir = ${prefix}/info infodir = ${prefix}/info
install_sh = /home/fbotelho/doutorado/algoritmos/cmph/cmph/install-sh install_sh = /home/fbotelho/doutorado/algoritmos/cmph/install-sh
libdir = ${exec_prefix}/lib libdir = ${exec_prefix}/lib
libexecdir = ${exec_prefix}/libexec libexecdir = ${exec_prefix}/libexec
localstatedir = ${prefix}/var localstatedir = ${prefix}/var

View File

@ -1,4 +1,4 @@
# Makefile.in generated by automake 1.9.1 from Makefile.am. # Makefile.in generated by automake 1.9.2 from Makefile.am.
# @configure_input@ # @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,

View File

@ -15,13 +15,6 @@
//#define DEBUG //#define DEBUG
#include "debug.h" #include "debug.h"
//static cmph_uint32 UNDEFINED = UINT_MAX;
/* static const char bitmask[8] = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 }; */
/* #define GETBIT(array, i) (array[(i) / 8] & bitmask[(i) % 8]) */
/* #define SETBIT(array, i) (array[(i) / 8] |= bitmask[(i) % 8]) */
/* #define UNSETBIT(array, i) (array[(i) / 8] &= (~(bitmask[(i) % 8]))) */
static int bmz_gen_edges(cmph_config_t *mph); static int bmz_gen_edges(cmph_config_t *mph);
static cmph_uint8 bmz_traverse_critical_nodes(bmz_config_data_t *bmz, cmph_uint32 v, cmph_uint32 * biggest_g_value, cmph_uint32 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited); static cmph_uint8 bmz_traverse_critical_nodes(bmz_config_data_t *bmz, cmph_uint32 v, cmph_uint32 * biggest_g_value, cmph_uint32 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited);
static cmph_uint8 bmz_traverse_critical_nodes_heuristic(bmz_config_data_t *bmz, cmph_uint32 v, cmph_uint32 * biggest_g_value, cmph_uint32 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited); static cmph_uint8 bmz_traverse_critical_nodes_heuristic(bmz_config_data_t *bmz, cmph_uint32 v, cmph_uint32 * biggest_g_value, cmph_uint32 * biggest_edge_value, cmph_uint8 * used_edges, cmph_uint8 * visited);
@ -535,7 +528,7 @@ cmph_uint32 bmz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2); DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
if (h1 == h2 && ++h2 > bmz->n) h2 = 0; if (h1 == h2 && ++h2 > bmz->n) h2 = 0;
DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, bmz->g[h1], bmz->g[h2], bmz->m); DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, bmz->g[h1], bmz->g[h2], bmz->m);
return (bmz->g[h1] + bmz->g[h2]); return bmz->g[h1] + bmz->g[h2];
} }
void bmz_destroy(cmph_t *mphf) void bmz_destroy(cmph_t *mphf)
{ {

View File

@ -375,9 +375,10 @@ static void bmz8_traverse(bmz8_config_data_t *bmz8, cmph_uint8 * used_edges, cmp
while((neighbor = graph_next_neighbor(bmz8->graph, &it)) != GRAPH_NO_NEIGHBOR) while((neighbor = graph_next_neighbor(bmz8->graph, &it)) != GRAPH_NO_NEIGHBOR)
{ {
if(GETBIT(visited,neighbor)) continue; if(GETBIT(visited,neighbor)) continue;
DEBUGP("Visiting neighbor %u\n", neighbor); //DEBUGP("Visiting neighbor %u\n", neighbor);
*unused_edge_index = next_unused_edge(bmz8, used_edges, *unused_edge_index); *unused_edge_index = next_unused_edge(bmz8, used_edges, *unused_edge_index);
bmz8->g[neighbor] = *unused_edge_index - bmz8->g[v]; bmz8->g[neighbor] = *unused_edge_index - bmz8->g[v];
//if (bmz8->g[neighbor] >= bmz8->m) bmz8->g[neighbor] += bmz8->m;
SETBIT(visited, neighbor); SETBIT(visited, neighbor);
(*unused_edge_index)++; (*unused_edge_index)++;
bmz8_traverse(bmz8, used_edges, neighbor, unused_edge_index, visited); bmz8_traverse(bmz8, used_edges, neighbor, unused_edge_index, visited);
@ -437,7 +438,7 @@ static int bmz8_gen_edges(cmph_config_t *mph)
mph->key_source->dispose(mph->key_source->data, key, keylen); mph->key_source->dispose(mph->key_source->data, key, keylen);
return 0; return 0;
} }
DEBUGP("Adding edge: %u -> %u for key %s\n", h1, h2, key); //DEBUGP("Adding edge: %u -> %u for key %s\n", h1, h2, key);
mph->key_source->dispose(mph->key_source->data, key, keylen); mph->key_source->dispose(mph->key_source->data, key, keylen);
// fprintf(stderr, "key = %s -- dispose BMZ\n", key); // fprintf(stderr, "key = %s -- dispose BMZ\n", key);
multiple_edges = graph_contains_edge(bmz8->graph, h1, h2); multiple_edges = graph_contains_edge(bmz8->graph, h1, h2);

131
src/brz.c
View File

@ -1,7 +1,7 @@
#include "graph.h" #include "graph.h"
#include "bmz.h" #include "bmz8.h"
#include "bmz_structs.h" #include "bmz8_structs.h"
#include "brz.h" #include "brz.h"
#include "cmph_structs.h" #include "cmph_structs.h"
#include "brz_structs.h" #include "brz_structs.h"
@ -22,7 +22,7 @@ static int brz_gen_graphs(cmph_config_t *mph);
static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n); static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n);
static char * brz_read_key(FILE * fd); static char * brz_read_key(FILE * fd);
static void brz_destroy_keys_vd(char ** keys_vd, cmph_uint8 nkeys); static void brz_destroy_keys_vd(char ** keys_vd, cmph_uint8 nkeys);
static void brz_copy_partial_mphf(brz_config_data_t *brz, bmz_data_t * bmzf, cmph_uint32 index, cmph_io_adapter_t *source); static void brz_copy_partial_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_io_adapter_t *source);
brz_config_data_t *brz_config_new() brz_config_data_t *brz_config_new()
{ {
@ -37,6 +37,7 @@ brz_config_data_t *brz_config_new()
brz->h1 = NULL; brz->h1 = NULL;
brz->h2 = NULL; brz->h2 = NULL;
brz->h3 = NULL; brz->h3 = NULL;
brz->memory_availability = 1024*1024;
brz->tmp_dir = (cmph_uint8 *)calloc(10, sizeof(cmph_uint8)); brz->tmp_dir = (cmph_uint8 *)calloc(10, sizeof(cmph_uint8));
strcpy(brz->tmp_dir, "/var/tmp/\0"); strcpy(brz->tmp_dir, "/var/tmp/\0");
assert(brz); assert(brz);
@ -64,6 +65,12 @@ void brz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
} }
} }
void brz_config_set_memory_availability(cmph_config_t *mph, cmph_uint32 memory_availability)
{
brz_config_data_t *brz = (brz_config_data_t *)mph->data;
brz->memory_availability = memory_availability*1024*1024;
}
void brz_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir) void brz_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir)
{ {
brz_config_data_t *brz = (brz_config_data_t *)mph->data; brz_config_data_t *brz = (brz_config_data_t *)mph->data;
@ -84,73 +91,6 @@ void brz_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir)
} }
} }
// static cmph_uint8 brz_verify_mphf(cmph_t * mphf, cmph_io_adapter_t *source)
// {
// cmph_uint8 * hashtable = NULL;
// cmph_uint32 i;
// hashtable = (cmph_uint8*)malloc(source->nkeys*sizeof(cmph_uint8));
// source->rewind(source->data);
// memset(hashtable, 0, source->nkeys);
// //check all keys
// for (i = 0; i < source->nkeys; ++i)
// {
// cmph_uint32 h;
// char *buf;
// cmph_uint32 buflen = 0;
// source->read(source->data, &buf, &buflen);
// h = cmph_search(mphf, buf, buflen);
// if(hashtable[h])
// {
// fprintf(stderr, "collision: %u\n",h);
// return 0;
// }
// //assert(hashtable[h]==0);
// hashtable[h] = 1;
// source->dispose(source->data, buf, buflen);
// }
// fprintf(stderr, "\n===============================================================================\n");
// free(hashtable);
// return 1;
// }
//
// static cmph_uint8 brz_verify_mphf1(hash_state_t *h1, hash_state_t *h2, cmph_uint8 * g, cmph_uint32 n, cmph_io_adapter_t *source)
// {
// cmph_uint8 * hashtable = NULL;
// cmph_uint32 i;
// hashtable = (cmph_uint8*)calloc(source->nkeys, sizeof(cmph_uint8));
// source->rewind(source->data);
// //memset(hashtable, 0, source->nkeys);
// //check all keys
// for (i = 0; i < source->nkeys; ++i)
// {
// cmph_uint32 h1_v;
// cmph_uint32 h2_v;
// cmph_uint32 h;
// char *buf;
// cmph_uint32 buflen = 0;
// source->read(source->data, &buf, &buflen);
//
// h1_v = hash(h1, buf, buflen) % n;
//
// h2_v = hash(h2, buf, buflen) % n;
//
// if (h1_v == h2_v && ++h2_v >= n) h2_v = 0;
//
// h = ((cmph_uint32)g[h1_v] + (cmph_uint32)g[h2_v]) % source->nkeys;
//
// if(hashtable[h])
// {
// fprintf(stderr, "collision: %u\n",h);
// return 0;
// }
// //assert(hashtable[h]==0);
// hashtable[h] = 1;
// source->dispose(source->data, buf, buflen);
//
// }
// free(hashtable);
// return 1;
// }
cmph_t *brz_new(cmph_config_t *mph, float c) cmph_t *brz_new(cmph_config_t *mph, float c)
{ {
@ -244,13 +184,12 @@ cmph_t *brz_new(cmph_config_t *mph, float c)
static int brz_gen_graphs(cmph_config_t *mph) static int brz_gen_graphs(cmph_config_t *mph)
{ {
#pragma pack(1)
cmph_uint32 i, e; cmph_uint32 i, e;
brz_config_data_t *brz = (brz_config_data_t *)mph->data; brz_config_data_t *brz = (brz_config_data_t *)mph->data;
cmph_uint32 memory_availability = 209715200;//200MB //104857600;//100MB //524288000; // 500MB //209715200; // 200 MB //cmph_uint32 memory_availability = 200*1024*1024;
cmph_uint32 memory_usage = 0; cmph_uint32 memory_usage = 0;
cmph_uint32 nkeys_in_buffer = 0; cmph_uint32 nkeys_in_buffer = 0;
cmph_uint8 *buffer = (cmph_uint8 *)malloc(memory_availability); cmph_uint8 *buffer = (cmph_uint8 *)malloc(brz->memory_availability);
cmph_uint32 *buckets_size = (cmph_uint32 *)calloc(brz->k, sizeof(cmph_uint32)); cmph_uint32 *buckets_size = (cmph_uint32 *)calloc(brz->k, sizeof(cmph_uint32));
cmph_uint32 *keys_index = NULL; cmph_uint32 *keys_index = NULL;
cmph_uint8 **buffer_merge = NULL; cmph_uint8 **buffer_merge = NULL;
@ -276,7 +215,7 @@ static int brz_gen_graphs(cmph_config_t *mph)
mph->key_source->read(mph->key_source->data, &key, &keylen); mph->key_source->read(mph->key_source->data, &key, &keylen);
/* Buffers management */ /* Buffers management */
if (memory_usage + keylen + 1 > memory_availability) // flush buffers if (memory_usage + keylen + 1 > brz->memory_availability) // flush buffers
{ {
if(mph->verbosity) if(mph->verbosity)
{ {
@ -305,7 +244,6 @@ static int brz_gen_graphs(cmph_config_t *mph)
buckets_size[h3]++; buckets_size[h3]++;
memory_usage = memory_usage + keylen1 + 1; memory_usage = memory_usage + keylen1 + 1;
} }
// sprintf(filename, "/mnt/hd4/fbotelho/%u.cmph",nflushes);
filename = (char *)calloc(strlen(brz->tmp_dir) + 11, sizeof(char)); filename = (char *)calloc(strlen(brz->tmp_dir) + 11, sizeof(char));
sprintf(filename, "%s%u.cmph",brz->tmp_dir, nflushes); sprintf(filename, "%s%u.cmph",brz->tmp_dir, nflushes);
tmp_fd = fopen(filename, "wb"); tmp_fd = fopen(filename, "wb");
@ -323,11 +261,10 @@ static int brz_gen_graphs(cmph_config_t *mph)
free(keys_index); free(keys_index);
fclose(tmp_fd); fclose(tmp_fd);
} }
//fprintf(stderr, "Storing read Key\n");
memcpy(buffer + memory_usage, key, keylen + 1); memcpy(buffer + memory_usage, key, keylen + 1);
memory_usage = memory_usage + keylen + 1; memory_usage = memory_usage + keylen + 1;
h3 = hash(brz->h3, key, keylen) % brz->k; h3 = hash(brz->h3, key, keylen) % brz->k;
if (brz->size[h3] == MAX_BUCKET_SIZE) if ((brz->size[h3] == MAX_BUCKET_SIZE) || ((brz->c >= 1.0) && (cmph_uint8)(brz->c * brz->size[h3]) < brz->size[h3]))
{ {
free(buffer); free(buffer);
free(buckets_size); free(buckets_size);
@ -367,8 +304,6 @@ static int brz_gen_graphs(cmph_config_t *mph)
buckets_size[h3]++; buckets_size[h3]++;
memory_usage = memory_usage + keylen1 + 1; memory_usage = memory_usage + keylen1 + 1;
} }
// sprintf(filename, "/mnt/hd4/fbotelho/%u.cmph",nflushes);
// sprintf(filename, "/mnt/sd2/fbotelho/dados/%u.cmph",nflushes);
filename = (char *)calloc(strlen(brz->tmp_dir) + 11, sizeof(char)); filename = (char *)calloc(strlen(brz->tmp_dir) + 11, sizeof(char));
sprintf(filename, "%s%u.cmph",brz->tmp_dir, nflushes); sprintf(filename, "%s%u.cmph",brz->tmp_dir, nflushes);
tmp_fd = fopen(filename, "wb"); tmp_fd = fopen(filename, "wb");
@ -401,8 +336,6 @@ static int brz_gen_graphs(cmph_config_t *mph)
for(i = 0; i < nflushes; i++) for(i = 0; i < nflushes; i++)
{ {
// sprintf(filename, "/mnt/hd4/fbotelho/%u.cmph",i);
// sprintf(filename, "/mnt/sd2/fbotelho/dados/%u.cmph",i);
filename = (char *)calloc(strlen(brz->tmp_dir) + 11, sizeof(char)); filename = (char *)calloc(strlen(brz->tmp_dir) + 11, sizeof(char));
sprintf(filename, "%s%u.cmph",brz->tmp_dir, i); sprintf(filename, "%s%u.cmph",brz->tmp_dir, i);
tmp_fds[i] = fopen(filename, "rb"); tmp_fds[i] = fopen(filename, "rb");
@ -420,7 +353,6 @@ static int brz_gen_graphs(cmph_config_t *mph)
e = 0; e = 0;
keys_vd = (char **)calloc(MAX_BUCKET_SIZE, sizeof(char *)); keys_vd = (char **)calloc(MAX_BUCKET_SIZE, sizeof(char *));
nkeys_vd = 0; nkeys_vd = 0;
//buffer = (cmph_uint8 *)malloc(memory_availability);
while(e < brz->m) while(e < brz->m)
{ {
i = brz_min_index(buffer_h3, nflushes); i = brz_min_index(buffer_h3, nflushes);
@ -436,56 +368,43 @@ static int brz_gen_graphs(cmph_config_t *mph)
if (h3 != buffer_h3[i]) break; if (h3 != buffer_h3[i]) break;
keys_vd[nkeys_vd++] = key; keys_vd[nkeys_vd++] = key;
//save_in_disk(buffer, key, keylen, &memory_usage, memory_availability, graphs_fd);
//fwrite(key, 1, keylen + 1, graphs_fd);
e++; e++;
//free(key);
key = brz_read_key(tmp_fds[i]); key = brz_read_key(tmp_fds[i]);
} }
if (key) if (key)
{ {
//save_in_disk(buffer, buffer_merge[i], strlen(buffer_merge[i]), &memory_usage, memory_availability, graphs_fd);
assert(nkeys_vd < brz->size[cur_bucket]); assert(nkeys_vd < brz->size[cur_bucket]);
keys_vd[nkeys_vd++] = buffer_merge[i]; keys_vd[nkeys_vd++] = buffer_merge[i];
//fwrite(buffer_merge[i], 1, strlen(buffer_merge[i]) + 1, graphs_fd);
e++; e++;
buffer_h3[i] = h3; buffer_h3[i] = h3;
//free(buffer_merge[i]);
buffer_merge[i] = (cmph_uint8 *)calloc(keylen + 1, sizeof(cmph_uint8)); buffer_merge[i] = (cmph_uint8 *)calloc(keylen + 1, sizeof(cmph_uint8));
memcpy(buffer_merge[i], key, keylen + 1); memcpy(buffer_merge[i], key, keylen + 1);
free(key); free(key);
} }
} }
/* fprintf(stderr, "BOSTA %u %u e: %u\n", i, buffer_h3[i], e);*/
if(!key) if(!key)
{ {
assert(nkeys_vd < brz->size[cur_bucket]); assert(nkeys_vd < brz->size[cur_bucket]);
keys_vd[nkeys_vd++] = buffer_merge[i]; keys_vd[nkeys_vd++] = buffer_merge[i];
//save_in_disk(buffer, buffer_merge[i], strlen(buffer_merge[i]), &memory_usage, memory_availability, graphs_fd);
//fwrite(buffer_merge[i], 1, strlen(buffer_merge[i]) + 1, graphs_fd);
e++; e++;
buffer_h3[i] = UINT_MAX; buffer_h3[i] = UINT_MAX;
//free(buffer_merge[i]);
buffer_merge[i] = NULL; buffer_merge[i] = NULL;
} }
if(nkeys_vd == brz->size[cur_bucket]) // Generating mphf. if(nkeys_vd == brz->size[cur_bucket]) // Generating mphf for each bucket.
{ {
cmph_io_adapter_t *source = NULL; cmph_io_adapter_t *source = NULL;
cmph_config_t *config = NULL; cmph_config_t *config = NULL;
cmph_t *mphf_tmp = NULL; cmph_t *mphf_tmp = NULL;
bmz_data_t * bmzf = NULL; bmz8_data_t * bmzf = NULL;
// Source of keys // Source of keys
//fprintf(stderr, "Generating mphf %u in %u \n",cur_bucket + 1, brz->k);
if(nkeys_vd > max_size) max_size = nkeys_vd; if(nkeys_vd > max_size) max_size = nkeys_vd;
source = cmph_io_vector_adapter(keys_vd, (cmph_uint32)nkeys_vd); source = cmph_io_vector_adapter(keys_vd, (cmph_uint32)nkeys_vd);
config = cmph_config_new(source); config = cmph_config_new(source);
cmph_config_set_algo(config, CMPH_BMZ); cmph_config_set_algo(config, CMPH_BMZ8);
cmph_config_set_graphsize(config, brz->c); cmph_config_set_graphsize(config, brz->c);
mphf_tmp = cmph_new(config); mphf_tmp = cmph_new(config);
bmzf = (bmz_data_t *)mphf_tmp->data; bmzf = (bmz8_data_t *)mphf_tmp->data;
//assert(brz_verify_mphf(mphf_tmp, source));
brz_copy_partial_mphf(brz, bmzf, cur_bucket, source); brz_copy_partial_mphf(brz, bmzf, cur_bucket, source);
cmph_config_destroy(config); cmph_config_destroy(config);
brz_destroy_keys_vd(keys_vd, nkeys_vd); brz_destroy_keys_vd(keys_vd, nkeys_vd);
@ -495,14 +414,12 @@ static int brz_gen_graphs(cmph_config_t *mph)
} }
} }
for(i = 0; i < nflushes; i++) fclose(tmp_fds[i]); for(i = 0; i < nflushes; i++) fclose(tmp_fds[i]);
//flush_buffer(buffer, &memory_usage, graphs_fd);
free(tmp_fds); free(tmp_fds);
free(keys_vd); free(keys_vd);
free(buffer_merge); free(buffer_merge);
free(buffer_h3); free(buffer_h3);
fprintf(stderr, "Maximal Size: %u\n", max_size); fprintf(stderr, "Maximal Size: %u\n", max_size);
return 1; return 1;
#pragma pack()
} }
static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n) static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n)
@ -541,7 +458,7 @@ static void brz_destroy_keys_vd(char ** keys_vd, cmph_uint8 nkeys)
for(i = 0; i < nkeys; i++) free(keys_vd[i]); for(i = 0; i < nkeys; i++) free(keys_vd[i]);
} }
static void brz_copy_partial_mphf(brz_config_data_t *brz, bmz_data_t * bmzf, cmph_uint32 index, cmph_io_adapter_t *source) static void brz_copy_partial_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_io_adapter_t *source)
{ {
cmph_uint32 i; cmph_uint32 i;
cmph_uint32 n = ceil(brz->c * brz->size[index]); cmph_uint32 n = ceil(brz->c * brz->size[index]);
@ -549,13 +466,11 @@ static void brz_copy_partial_mphf(brz_config_data_t *brz, bmz_data_t * bmzf, cmp
brz->g[index] = (cmph_uint8 *)calloc(n, sizeof(cmph_uint8)); brz->g[index] = (cmph_uint8 *)calloc(n, sizeof(cmph_uint8));
for(i = 0; i < n; i++) for(i = 0; i < n; i++)
{ {
brz->g[index][i] = (cmph_uint8) bmzf->g[i]; brz->g[index][i] = bmzf->g[i];
//fprintf(stderr, "gsrc[%u]: %u gdest: %u\n", i, (cmph_uint8) bmzf->g[i], brz->g[index][i]); //fprintf(stderr, "gsrc[%u]: %u gdest: %u\n", i, (cmph_uint8) bmzf->g[i], brz->g[index][i]);
} }
brz->h1[index] = hash_state_copy(bmzf->hashes[0]); brz->h1[index] = hash_state_copy(bmzf->hashes[0]);
brz->h2[index] = hash_state_copy(bmzf->hashes[1]); brz->h2[index] = hash_state_copy(bmzf->hashes[1]);
//brz->size[index] = bmzf->n;
//assert(brz_verify_mphf1(brz->h1[index], brz->h2[index], brz->g[index], n, source));
} }
int brz_dump(cmph_t *mphf, FILE *fd) int brz_dump(cmph_t *mphf, FILE *fd)
@ -675,11 +590,13 @@ cmph_uint32 brz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
cmph_uint32 n = ceil(brz->c * m); cmph_uint32 n = ceil(brz->c * m);
cmph_uint32 h1 = hash(brz->h1[h3], key, keylen) % n; cmph_uint32 h1 = hash(brz->h1[h3], key, keylen) % n;
cmph_uint32 h2 = hash(brz->h2[h3], key, keylen) % n; cmph_uint32 h2 = hash(brz->h2[h3], key, keylen) % n;
cmph_uint8 mphf_bucket;
if (h1 == h2 && ++h2 >= n) h2 = 0; if (h1 == h2 && ++h2 >= n) h2 = 0;
mphf_bucket = brz->g[h3][h1] + brz->g[h3][h2];
DEBUGP("key: %s h1: %u h2: %u h3: %u\n", key, h1, h2, h3); DEBUGP("key: %s h1: %u h2: %u h3: %u\n", key, h1, h2, h3);
DEBUGP("key: %s g[h1]: %u g[h2]: %u offset[h3]: %u edges: %u\n", key, brz->g[h3][h1], brz->g[h3][h2], brz->offset[h3], brz->m); DEBUGP("key: %s g[h1]: %u g[h2]: %u offset[h3]: %u edges: %u\n", key, brz->g[h3][h1], brz->g[h3][h2], brz->offset[h3], brz->m);
DEBUGP("Address: %u\n", (((cmph_uint32)brz->g[h3][h1] + (cmph_uint32)brz->g[h3][h2])% m + brz->offset[h3])); DEBUGP("Address: %u\n", mphf_bucket + brz->offset[h3]);
return (((cmph_uint32)brz->g[h3][h1] + (cmph_uint32)brz->g[h3][h2])% m + brz->offset[h3]); return (mphf_bucket + brz->offset[h3]);
} }
void brz_destroy(cmph_t *mphf) void brz_destroy(cmph_t *mphf)
{ {

View File

@ -28,6 +28,7 @@ struct __brz_config_data_t
hash_state_t **h1; hash_state_t **h1;
hash_state_t **h2; hash_state_t **h2;
hash_state_t * h3; hash_state_t * h3;
cmph_uint32 memory_availability;
cmph_uint8 * tmp_dir; // temporary directory cmph_uint8 * tmp_dir; // temporary directory
}; };

View File

@ -173,10 +173,11 @@ void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo)
mph->data = bmz_config_new(); mph->data = bmz_config_new();
break; break;
case CMPH_BMZ8: case CMPH_BMZ8:
mph->data = (void*)bmz8_config_new(); mph->data = bmz8_config_new();
break; break;
case CMPH_BRZ: case CMPH_BRZ:
mph->data = brz_config_new(); mph->data = brz_config_new();
break;
default: default:
assert(0); assert(0);
} }
@ -219,6 +220,7 @@ void cmph_config_destroy(cmph_config_t *mph)
break; break;
case CMPH_BRZ: /* included -- Fabiano */ case CMPH_BRZ: /* included -- Fabiano */
brz_config_destroy(mph); brz_config_destroy(mph);
break;
default: default:
assert(0); assert(0);
} }