Merge branch 'master' of ssh://cmph.git.sourceforge.net/gitroot/cmph/cmph

2012-03-20 21:50:46 -07:00
parent bf98b6eaf1 e760465fca
commit 14fda50f8f
65 changed files with 3769 additions and 1049 deletions
--- a/2
+++ b/2
@@ -1,3 +1,5 @@
 The code of the cmph library is dual licensed under the LGPL version 2 and MPL
 1.1 licenses. Please refer to the LGPL-2 and MPL-1.1 files in the repository
 for the full description of each of the licenses.
+
+For cxxmph, the files stringpiece.h and MurmurHash2 are covered by the BSD and MIT licenses, respectively.
--- a/365
+++ b/365
@@ -1,365 +0,0 @@
-Installation Instructions
-*************************
-
-Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005,
-2006, 2007, 2008, 2009 Free Software Foundation, Inc.
-
-   Copying and distribution of this file, with or without modification,
-are permitted in any medium without royalty provided the copyright
-notice and this notice are preserved.  This file is offered as-is,
-without warranty of any kind.
-
-Basic Installation
-==================
-
-   Briefly, the shell commands `./configure; make; make install' should
-configure, build, and install this package.  The following
-more-detailed instructions are generic; see the `README' file for
-instructions specific to this package.  Some packages provide this
-`INSTALL' file but do not implement all of the features documented
-below.  The lack of an optional feature in a given package is not
-necessarily a bug.  More recommendations for GNU packages can be found
-in *note Makefile Conventions: (standards)Makefile Conventions.
-
-   The `configure' shell script attempts to guess correct values for
-various system-dependent variables used during compilation.  It uses
-those values to create a `Makefile' in each directory of the package.
-It may also create one or more `.h' files containing system-dependent
-definitions.  Finally, it creates a shell script `config.status' that
-you can run in the future to recreate the current configuration, and a
-file `config.log' containing compiler output (useful mainly for
-debugging `configure').
-
-   It can also use an optional file (typically called `config.cache'
-and enabled with `--cache-file=config.cache' or simply `-C') that saves
-the results of its tests to speed up reconfiguring.  Caching is
-disabled by default to prevent problems with accidental use of stale
-cache files.
-
-   If you need to do unusual things to compile the package, please try
-to figure out how `configure' could check whether to do them, and mail
-diffs or instructions to the address given in the `README' so they can
-be considered for the next release.  If you are using the cache, and at
-some point `config.cache' contains results you don't want to keep, you
-may remove or edit it.
-
-   The file `configure.ac' (or `configure.in') is used to create
-`configure' by a program called `autoconf'.  You need `configure.ac' if
-you want to change it or regenerate `configure' using a newer version
-of `autoconf'.
-
-   The simplest way to compile this package is:
-
-  1. `cd' to the directory containing the package's source code and type
-     `./configure' to configure the package for your system.
-
-     Running `configure' might take a while.  While running, it prints
-     some messages telling which features it is checking for.
-
-  2. Type `make' to compile the package.
-
-  3. Optionally, type `make check' to run any self-tests that come with
-     the package, generally using the just-built uninstalled binaries.
-
-  4. Type `make install' to install the programs and any data files and
-     documentation.  When installing into a prefix owned by root, it is
-     recommended that the package be configured and built as a regular
-     user, and only the `make install' phase executed with root
-     privileges.
-
-  5. Optionally, type `make installcheck' to repeat any self-tests, but
-     this time using the binaries in their final installed location.
-     This target does not install anything.  Running this target as a
-     regular user, particularly if the prior `make install' required
-     root privileges, verifies that the installation completed
-     correctly.
-
-  6. You can remove the program binaries and object files from the
-     source code directory by typing `make clean'.  To also remove the
-     files that `configure' created (so you can compile the package for
-     a different kind of computer), type `make distclean'.  There is
-     also a `make maintainer-clean' target, but that is intended mainly
-     for the package's developers.  If you use it, you may have to get
-     all sorts of other programs in order to regenerate files that came
-     with the distribution.
-
-  7. Often, you can also type `make uninstall' to remove the installed
-     files again.  In practice, not all packages have tested that
-     uninstallation works correctly, even though it is required by the
-     GNU Coding Standards.
-
-  8. Some packages, particularly those that use Automake, provide `make
-     distcheck', which can by used by developers to test that all other
-     targets like `make install' and `make uninstall' work correctly.
-     This target is generally not run by end users.
-
-Compilers and Options
-=====================
-
-   Some systems require unusual options for compilation or linking that
-the `configure' script does not know about.  Run `./configure --help'
-for details on some of the pertinent environment variables.
-
-   You can give `configure' initial values for configuration parameters
-by setting variables in the command line or in the environment.  Here
-is an example:
-
-     ./configure CC=c99 CFLAGS=-g LIBS=-lposix
-
-   *Note Defining Variables::, for more details.
-
-Compiling For Multiple Architectures
-====================================
-
-   You can compile the package for more than one kind of computer at the
-same time, by placing the object files for each architecture in their
-own directory.  To do this, you can use GNU `make'.  `cd' to the
-directory where you want the object files and executables to go and run
-the `configure' script.  `configure' automatically checks for the
-source code in the directory that `configure' is in and in `..'.  This
-is known as a "VPATH" build.
-
-   With a non-GNU `make', it is safer to compile the package for one
-architecture at a time in the source code directory.  After you have
-installed the package for one architecture, use `make distclean' before
-reconfiguring for another architecture.
-
-   On MacOS X 10.5 and later systems, you can create libraries and
-executables that work on multiple system types--known as "fat" or
-"universal" binaries--by specifying multiple `-arch' options to the
-compiler but only a single `-arch' option to the preprocessor.  Like
-this:
-
-     ./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
-                 CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
-                 CPP="gcc -E" CXXCPP="g++ -E"
-
-   This is not guaranteed to produce working output in all cases, you
-may have to build one architecture at a time and combine the results
-using the `lipo' tool if you have problems.
-
-Installation Names
-==================
-
-   By default, `make install' installs the package's commands under
-`/usr/local/bin', include files under `/usr/local/include', etc.  You
-can specify an installation prefix other than `/usr/local' by giving
-`configure' the option `--prefix=PREFIX', where PREFIX must be an
-absolute file name.
-
-   You can specify separate installation prefixes for
-architecture-specific files and architecture-independent files.  If you
-pass the option `--exec-prefix=PREFIX' to `configure', the package uses
-PREFIX as the prefix for installing programs and libraries.
-Documentation and other data files still use the regular prefix.
-
-   In addition, if you use an unusual directory layout you can give
-options like `--bindir=DIR' to specify different values for particular
-kinds of files.  Run `configure --help' for a list of the directories
-you can set and what kinds of files go in them.  In general, the
-default for these options is expressed in terms of `${prefix}', so that
-specifying just `--prefix' will affect all of the other directory
-specifications that were not explicitly provided.
-
-   The most portable way to affect installation locations is to pass the
-correct locations to `configure'; however, many packages provide one or
-both of the following shortcuts of passing variable assignments to the
-`make install' command line to change installation locations without
-having to reconfigure or recompile.
-
-   The first method involves providing an override variable for each
-affected directory.  For example, `make install
-prefix=/alternate/directory' will choose an alternate location for all
-directory configuration variables that were expressed in terms of
-`${prefix}'.  Any directories that were specified during `configure',
-but not in terms of `${prefix}', must each be overridden at install
-time for the entire installation to be relocated.  The approach of
-makefile variable overrides for each directory variable is required by
-the GNU Coding Standards, and ideally causes no recompilation.
-However, some platforms have known limitations with the semantics of
-shared libraries that end up requiring recompilation when using this
-method, particularly noticeable in packages that use GNU Libtool.
-
-   The second method involves providing the `DESTDIR' variable.  For
-example, `make install DESTDIR=/alternate/directory' will prepend
-`/alternate/directory' before all installation names.  The approach of
-`DESTDIR' overrides is not required by the GNU Coding Standards, and
-does not work on platforms that have drive letters.  On the other hand,
-it does better at avoiding recompilation issues, and works well even
-when some directory options were not specified in terms of `${prefix}'
-at `configure' time.
-
-Optional Features
-=================
-
-   If the package supports it, you can cause programs to be installed
-with an extra prefix or suffix on their names by giving `configure' the
-option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
-
-   Some packages pay attention to `--enable-FEATURE' options to
-`configure', where FEATURE indicates an optional part of the package.
-They may also pay attention to `--with-PACKAGE' options, where PACKAGE
-is something like `gnu-as' or `x' (for the X Window System).  The
-`README' should mention any `--enable-' and `--with-' options that the
-package recognizes.
-
-   For packages that use the X Window System, `configure' can usually
-find the X include and library files automatically, but if it doesn't,
-you can use the `configure' options `--x-includes=DIR' and
-`--x-libraries=DIR' to specify their locations.
-
-   Some packages offer the ability to configure how verbose the
-execution of `make' will be.  For these packages, running `./configure
--enable-silent-rules' sets the default to minimal output, which can be
-overridden with `make V=1'; while running `./configure
--disable-silent-rules' sets the default to verbose, which can be
-overridden with `make V=0'.
-
-Particular systems
-==================
-
-   On HP-UX, the default C compiler is not ANSI C compatible.  If GNU
-CC is not installed, it is recommended to use the following options in
-order to use an ANSI C compiler:
-
-     ./configure CC="cc -Ae -D_XOPEN_SOURCE=500"
-
-and if that doesn't work, install pre-built binaries of GCC for HP-UX.
-
-   On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot
-parse its `<wchar.h>' header file.  The option `-nodtk' can be used as
-a workaround.  If GNU CC is not installed, it is therefore recommended
-to try
-
-     ./configure CC="cc"
-
-and if that doesn't work, try
-
-     ./configure CC="cc -nodtk"
-
-   On Solaris, don't put `/usr/ucb' early in your `PATH'.  This
-directory contains several dysfunctional programs; working variants of
-these programs are available in `/usr/bin'.  So, if you need `/usr/ucb'
-in your `PATH', put it _after_ `/usr/bin'.
-
-   On Haiku, software installed for all users goes in `/boot/common',
-not `/usr/local'.  It is recommended to use the following options:
-
-     ./configure --prefix=/boot/common
-
-Specifying the System Type
-==========================
-
-   There may be some features `configure' cannot figure out
-automatically, but needs to determine by the type of machine the package
-will run on.  Usually, assuming the package is built to be run on the
-_same_ architectures, `configure' can figure that out, but if it prints
-a message saying it cannot guess the machine type, give it the
-`--build=TYPE' option.  TYPE can either be a short name for the system
-type, such as `sun4', or a canonical name which has the form:
-
-     CPU-COMPANY-SYSTEM
-
-where SYSTEM can have one of these forms:
-
-     OS
-     KERNEL-OS
-
-   See the file `config.sub' for the possible values of each field.  If
-`config.sub' isn't included in this package, then this package doesn't
-need to know the machine type.
-
-   If you are _building_ compiler tools for cross-compiling, you should
-use the option `--target=TYPE' to select the type of system they will
-produce code for.
-
-   If you want to _use_ a cross compiler, that generates code for a
-platform different from the build platform, you should specify the
-"host" platform (i.e., that on which the generated programs will
-eventually be run) with `--host=TYPE'.
-
-Sharing Defaults
-================
-
-   If you want to set default values for `configure' scripts to share,
-you can create a site shell script called `config.site' that gives
-default values for variables like `CC', `cache_file', and `prefix'.
-`configure' looks for `PREFIX/share/config.site' if it exists, then
-`PREFIX/etc/config.site' if it exists.  Or, you can set the
-`CONFIG_SITE' environment variable to the location of the site script.
-A warning: not all `configure' scripts look for a site script.
-
-Defining Variables
-==================
-
-   Variables not defined in a site shell script can be set in the
-environment passed to `configure'.  However, some packages may run
-configure again during the build, and the customized values of these
-variables may be lost.  In order to avoid this problem, you should set
-them in the `configure' command line, using `VAR=value'.  For example:
-
-     ./configure CC=/usr/local2/bin/gcc
-
-causes the specified `gcc' to be used as the C compiler (unless it is
-overridden in the site shell script).
-
-Unfortunately, this technique does not work for `CONFIG_SHELL' due to
-an Autoconf bug.  Until the bug is fixed you can use this workaround:
-
-     CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash
-
-`configure' Invocation
-======================
-
-   `configure' recognizes the following options to control how it
-operates.
-
-`--help'
-`-h'
-     Print a summary of all of the options to `configure', and exit.
-
-`--help=short'
-`--help=recursive'
-     Print a summary of the options unique to this package's
-     `configure', and exit.  The `short' variant lists options used
-     only in the top level, while the `recursive' variant lists options
-     also present in any nested packages.
-
-`--version'
-`-V'
-     Print the version of Autoconf used to generate the `configure'
-     script, and exit.
-
-`--cache-file=FILE'
-     Enable the cache: use and save the results of the tests in FILE,
-     traditionally `config.cache'.  FILE defaults to `/dev/null' to
-     disable caching.
-
-`--config-cache'
-`-C'
-     Alias for `--cache-file=config.cache'.
-
-`--quiet'
-`--silent'
-`-q'
-     Do not print messages saying which checks are being made.  To
-     suppress all normal output, redirect it to `/dev/null' (any error
-     messages will still be shown).
-
-`--srcdir=DIR'
-     Look for the package's source code in directory DIR.  Usually
-     `configure' can determine that directory automatically.
-
-`--prefix=DIR'
-     Use DIR as the installation prefix.  *note Installation Names::
-     for more details, including other options available for fine-tuning
-     the installation locations.
-
-`--no-create'
-`-n'
-     Run the configure checks, but stop before creating any output
-     files.
-
-`configure' also accepts some other, not widely useful, options.  Run
-`configure --help' for more details.
-
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,4 +1,4 @@
-SUBDIRS = src tests examples man
+SUBDIRS = src tests examples man $(CXXMPH)
 EXTRA_DIST = cmph.spec configure.ac cmph.pc.in LGPL-2 MPL-1.1

 pkgconfigdir = $(libdir)/pkgconfig
--- a/NEWSLOG.t2t
+++ b/NEWSLOG.t2t
@@ -5,6 +5,10 @@ News Log

 ----------------------------------------

+==News for version 1.1==
+
+Fixed a bug in the chd_pc algorithm and reorganized tests.
+
 ==News for version 1.0==

 This is a bugfix only version, after which a revamp of the cmph code and
--- a/README.t2t
+++ b/README.t2t
@@ -88,6 +88,10 @@ The CMPH Library encapsulates the newest and more efficient algorithms in an eas

 ----------------------------------------

+==News for version 1.1==
+
+Fixed a bug in the chd_pc algorithm and reorganized tests.
+
 ==News for version 1.0==

 This is a bugfix only version, after which a revamp of the cmph code and
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -1,9 +1,95 @@
+AC_DEFUN([AC_ENABLE_CXXMPH], [AC_ARG_ENABLE([cxxmph],
+	[  --enable-cxxmph	enable the c++ cxxmph library ],
+	[case "${enableval}" in
+		yes) cxxmph=true ;;
+		no)  cxxmph=false ;;
+		*) AC_MSG_ERROR([bad value ${enableval} for --enable-cxxmph]) ;;
+	esac],[cxxmph=false])])
+
 AC_DEFUN([AC_CHECK_SPOON], [
 	AC_ARG_WITH(spoon, [  --with-spoon=SPOON this is inocuous, since the truth is that there is no spoon ])
 	AC_MSG_CHECKING(if there is spoon)
 	AC_MSG_RESULT(no)
 ])

+dnl Check for baseline language coverage in the compiler for the C++0x standard.
+# AC_COMPILE_STDCXX_OX
+AC_DEFUN([AC_COMPILE_STDCXX_0X], [
+  AC_CACHE_CHECK(if compiler supports C++0x features without additional flags,
+  ac_cv_cxx_compile_cxx0x_native,
+  [AC_LANG_SAVE
+  AC_LANG_CPLUSPLUS
+  AC_TRY_COMPILE([
+  #include <unorderd_map>
+  #include <unorderd_set>
+  template <typename T>
+    struct check
+    {
+      static_assert(sizeof(int) <= sizeof(T), "not big enough");
+    };
+
+    typedef check<check<bool>> right_angle_brackets;
+
+    int a;
+    decltype(a) b;
+    ],,
+  ac_cv_cxx_compile_cxx0x_native=yes, ac_cv_cxx_compile_cxx0x_native=no)
+  AC_LANG_RESTORE
+  ])
+
+  AC_CACHE_CHECK(if compiler supports C++0x features with -std=c++0x,
+  ac_cv_cxx_compile_cxx0x_cxx,
+  [AC_LANG_SAVE
+  AC_LANG_CPLUSPLUS
+  ac_save_CXXFLAGS="$CXXFLAGS"
+  CXXFLAGS="$CXXFLAGS -std=c++0x"
+  AC_TRY_COMPILE([
+  #include <unordered_map>
+  template <typename T>
+    struct check
+    {
+      static_assert(sizeof(int) <= sizeof(T), "not big enough");
+    };
+
+    typedef check<check<bool>> right_angle_brackets;
+
+    int a;
+    decltype(a) b;],,
+  ac_cv_cxx_compile_cxx0x_cxx=yes, ac_cv_cxx_compile_cxx0x_cxx=no)
+  CXXFLAGS="$ac_save_CXXFLAGS"
+  AC_LANG_RESTORE
+  ])
+
+  AC_CACHE_CHECK(if compiler supports C++0x features with -std=gnu++0x,
+  ac_cv_cxx_compile_cxx0x_gxx,
+  [AC_LANG_SAVE
+  AC_LANG_CPLUSPLUS
+  ac_save_CXXFLAGS="$CXXFLAGS"
+  CXXFLAGS="$CXXFLAGS -std=gnu++0x"
+  AC_TRY_COMPILE([
+  #include <unordered_map>
+  template <typename T>
+    struct check
+    {
+      static_assert(sizeof(int) <= sizeof(T), "not big enough");
+    };
+
+    typedef check<check<bool>> right_angle_brackets;
+
+    int a;
+    decltype(a) b;],,
+  ac_cv_cxx_compile_cxx0x_gxx=yes, ac_cv_cxx_compile_cxx0x_gxx=no)
+  CXXFLAGS="$ac_save_CXXFLAGS"
+  AC_LANG_RESTORE
+  ])
+
+  if test "$ac_cv_cxx_compile_cxx0x_native" = yes ||
+     test "$ac_cv_cxx_compile_cxx0x_cxx" = yes ||
+     test "$ac_cv_cxx_compile_cxx0x_gxx" = yes; then
+    AC_DEFINE(HAVE_STDCXX_0X,,[Define if g++ supports C++0x features. ])
+  fi
+])
+
 dnl By default, many hosts won't let programs access large files;
 dnl one must use special compiler options to get large-file access to work.
 dnl For more details about this brain damage please see:
--- a/configure.ac
+++ b/configure.ac
@@ -1,16 +1,16 @@
 dnl Process this file with autoconf to produce a configure script.
-AC_INIT(Makefile.am)
+AC_INIT
+AC_CONFIG_SRCDIR([Makefile.am])
 AM_INIT_AUTOMAKE(cmph, 1.0)
-AM_CONFIG_HEADER(config.h)
+AC_CONFIG_HEADERS([config.h])
 AC_CONFIG_MACRO_DIR([m4])

 dnl Checks for programs.
 AC_PROG_AWK
 AC_PROG_CC
-AC_PROG_CXX
 AC_PROG_INSTALL
 AC_PROG_LN_S
-AC_PROG_LIBTOOL
+LT_INIT
 AC_SYS_EXTRA_LARGEFILE
 if test "x$ac_cv_sys_largefile_CFLAGS" = "xno" ; then
 	ac_cv_sys_largefile_CFLAGS=""
@@ -25,17 +25,32 @@ CFLAGS="$CFLAGS $ac_cv_sys_largefile_CFLAGS"
 LDFLAGS="$LDFLAGS $ac_cv_sys_largefile_LDFLAGS"
 LIBS="$LIBS $ac_cv_sys_largefile_LIBS"

-
 dnl Checks for headers
 AC_CHECK_HEADERS([getopt.h math.h])

 dnl Checks for libraries.
-AC_CHECK_LIBM  
+LT_LIB_M  
 LDFLAGS="$LIBM $LDFLAGS"
 CFLAGS="-Wall -Werror"

-dnl Checks for library functions.
+AC_PROG_CXX
+AC_ENABLE_CXXMPH
+if test x$cxxmph = xtrue; then
+  AC_COMPILE_STDCXX_0X
+  if test x$ac_cv_cxx_compile_cxx0x_native = "xno"; then
+    if test x$ac_cv_cxx_compile_cxx0x_cxx = "xyes"; then
+      CXXFLAGS="$CXXFLAGS -std=c++0x"
+    elif test x$ac_cv_cxx_compile_cxx0x_gxx = "xyes"; then
+      CXXFLAGS="$CXXFLAGS -std=gnu++0x"
+    else
+      AC_MSG_ERROR("cxxmph demands a working c++0x compiler.")
+    fi
+  fi
+  AC_SUBST([CXXMPH], "cxxmph")
+fi

 AC_CHECK_SPOON
-dnl AC_OUTPUT(Makefile tests/Makefile samples/Makefile)
-AC_OUTPUT(Makefile src/Makefile tests/Makefile examples/Makefile man/Makefile cmph.pc)
+dnl AC_CONFIG_FILES([Makefile tests/Makefile samples/Makefile])
+AC_OUTPUT
+AC_CONFIG_FILES([Makefile src/Makefile cxxmph/Makefile tests/Makefile examples/Makefile man/Makefile cmph.pc])
+AC_OUTPUT
--- a/cxxmph/Makefile.am
+++ b/cxxmph/Makefile.am
@@ -0,0 +1,32 @@
+TESTS = $(check_PROGRAMS)
+check_PROGRAMS = mph_bits_test hollow_iterator_test mph_map_test mph_index_test trigraph_test
+noinst_PROGRAMS = bm_index bm_map
+bin_PROGRAMS = cxxmph
+lib_LTLIBRARIES = libcxxmph.la
+libcxxmph_la_SOURCES = MurmurHash3.h MurmurHash3.cpp trigragh.h trigraph.cc mph_index.h mph_index.cc seeded_hash.h stringpiece.h benchmark.h benchmark.cc mph_bits.h mph_bits.cc
+libcxxmph_la_LDFLAGS = -version-info 0:0:0
+cxxmph_includedir = $(includedir)/cxxmph/
+cxxmph_include_HEADERS = mph_map.h mph_index.h MurmurHash3.h trigraph.h seeded_hash.h stringpiece.h hollow_iterator.h
+
+mph_map_test_LDADD   = libcxxmph.la
+mph_map_test_SOURCES = mph_map_test.cc
+
+mph_index_test_LDADD   = libcxxmph.la
+mph_index_test_SOURCES = mph_index_test.cc
+
+bm_index_LDADD = libcxxmph.la -lcmph
+bm_index_SOURCES = bm_common.cc bm_index.cc 
+
+trigraph_test_LDADD   = libcxxmph.la
+trigraph_test_SOURCES = trigraph_test.cc
+
+bm_map_LDADD = libcxxmph.la
+bm_map_SOURCES = bm_common.cc bm_map.cc
+
+cxxmph_LDADD   = libcxxmph.la
+cxxmph_SOURCES = cxxmph.cc
+
+hollow_iterator_test_SOURCES = hollow_iterator_test.cc
+mph_bits_test_SOURCES = mph_bits_test.cc
+mph_bits_test_LDADD   = libcxxmph.la
+
--- a/cxxmph/MurmurHash3.cpp
+++ b/cxxmph/MurmurHash3.cpp
@@ -0,0 +1,335 @@
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+// Note - The x86 and x64 versions do _not_ produce the same results, as the
+// algorithms are optimized for their respective platforms. You can still
+// compile and run any of them on any platform, but your performance with the
+// non-native version will be less than optimal.
+
+#include "MurmurHash3.h"
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+#define FORCE_INLINE	__forceinline
+
+#include <stdlib.h>
+
+#define ROTL32(x,y)	_rotl(x,y)
+#define ROTL64(x,y)	_rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x)
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#define	FORCE_INLINE __attribute__((always_inline))
+
+inline uint32_t rotl32 ( uint32_t x, int8_t r )
+{
+  return (x << r) | (x >> (32 - r));
+}
+
+inline uint64_t rotl64 ( uint64_t x, int8_t r )
+{
+  return (x << r) | (x >> (64 - r));
+}
+
+#define	ROTL32(x,y)	rotl32(x,y)
+#define ROTL64(x,y)	rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+// Block read - if your platform needs to do endian-swapping or can only
+// handle aligned reads, do the conversion here
+
+FORCE_INLINE uint32_t getblock ( const uint32_t * p, int i )
+{
+  return p[i];
+}
+
+FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i )
+{
+  return p[i];
+}
+
+//-----------------------------------------------------------------------------
+// Finalization mix - force all bits of a hash block to avalanche
+
+FORCE_INLINE uint32_t fmix ( uint32_t h )
+{
+  h ^= h >> 16;
+  h *= 0x85ebca6b;
+  h ^= h >> 13;
+  h *= 0xc2b2ae35;
+  h ^= h >> 16;
+
+  return h;
+}
+
+//----------
+
+FORCE_INLINE uint64_t fmix ( uint64_t k )
+{
+  k ^= k >> 33;
+  k *= BIG_CONSTANT(0xff51afd7ed558ccd);
+  k ^= k >> 33;
+  k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
+  k ^= k >> 33;
+
+  return k;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_32 ( const void * key, int len,
+                          uint32_t seed, void * out )
+{
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 4;
+
+  uint32_t h1 = seed;
+
+  uint32_t c1 = 0xcc9e2d51;
+  uint32_t c2 = 0x1b873593;
+
+  //----------
+  // body
+
+  const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
+
+  for(int i = -nblocks; i; i++)
+  {
+    uint32_t k1 = getblock(blocks,i);
+
+    k1 *= c1;
+    k1 = ROTL32(k1,15);
+    k1 *= c2;
+    
+    h1 ^= k1;
+    h1 = ROTL32(h1,13); 
+    h1 = h1*5+0xe6546b64;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
+
+  uint32_t k1 = 0;
+
+  switch(len & 3)
+  {
+  case 3: k1 ^= tail[2] << 16;
+  case 2: k1 ^= tail[1] << 8;
+  case 1: k1 ^= tail[0];
+          k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+  };
+
+  //----------
+  // finalization
+
+  h1 ^= len;
+
+  h1 = fmix(h1);
+
+  *(uint32_t*)out = h1;
+} 
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_128 ( const void * key, const int len,
+                           uint32_t seed, void * out )
+{
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 16;
+
+  uint32_t h1 = seed;
+  uint32_t h2 = seed;
+  uint32_t h3 = seed;
+  uint32_t h4 = seed;
+
+  uint32_t c1 = 0x239b961b; 
+  uint32_t c2 = 0xab0e9789;
+  uint32_t c3 = 0x38b34ae5; 
+  uint32_t c4 = 0xa1e38b93;
+
+  //----------
+  // body
+
+  const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
+
+  for(int i = -nblocks; i; i++)
+  {
+    uint32_t k1 = getblock(blocks,i*4+0);
+    uint32_t k2 = getblock(blocks,i*4+1);
+    uint32_t k3 = getblock(blocks,i*4+2);
+    uint32_t k4 = getblock(blocks,i*4+3);
+
+    k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+
+    h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
+
+    k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
+
+    h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
+
+    k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
+
+    h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
+
+    k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
+
+    h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+
+  uint32_t k1 = 0;
+  uint32_t k2 = 0;
+  uint32_t k3 = 0;
+  uint32_t k4 = 0;
+
+  switch(len & 15)
+  {
+  case 15: k4 ^= tail[14] << 16;
+  case 14: k4 ^= tail[13] << 8;
+  case 13: k4 ^= tail[12] << 0;
+           k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
+
+  case 12: k3 ^= tail[11] << 24;
+  case 11: k3 ^= tail[10] << 16;
+  case 10: k3 ^= tail[ 9] << 8;
+  case  9: k3 ^= tail[ 8] << 0;
+           k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
+
+  case  8: k2 ^= tail[ 7] << 24;
+  case  7: k2 ^= tail[ 6] << 16;
+  case  6: k2 ^= tail[ 5] << 8;
+  case  5: k2 ^= tail[ 4] << 0;
+           k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
+
+  case  4: k1 ^= tail[ 3] << 24;
+  case  3: k1 ^= tail[ 2] << 16;
+  case  2: k1 ^= tail[ 1] << 8;
+  case  1: k1 ^= tail[ 0] << 0;
+           k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+  };
+
+  //----------
+  // finalization
+
+  h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
+
+  h1 += h2; h1 += h3; h1 += h4;
+  h2 += h1; h3 += h1; h4 += h1;
+
+  h1 = fmix(h1);
+  h2 = fmix(h2);
+  h3 = fmix(h3);
+  h4 = fmix(h4);
+
+  h1 += h2; h1 += h3; h1 += h4;
+  h2 += h1; h3 += h1; h4 += h1;
+
+  ((uint32_t*)out)[0] = h1;
+  ((uint32_t*)out)[1] = h2;
+  ((uint32_t*)out)[2] = h3;
+  ((uint32_t*)out)[3] = h4;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x64_128 ( const void * key, const int len,
+                           const uint32_t seed, void * out )
+{
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 16;
+
+  uint64_t h1 = seed;
+  uint64_t h2 = seed;
+
+  uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
+  uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
+
+  //----------
+  // body
+
+  const uint64_t * blocks = (const uint64_t *)(data);
+
+  for(int i = 0; i < nblocks; i++)
+  {
+    uint64_t k1 = getblock(blocks,i*2+0);
+    uint64_t k2 = getblock(blocks,i*2+1);
+
+    k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
+
+    h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
+
+    k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
+
+    h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+
+  uint64_t k1 = 0;
+  uint64_t k2 = 0;
+
+  switch(len & 15)
+  {
+  case 15: k2 ^= uint64_t(tail[14]) << 48;
+  case 14: k2 ^= uint64_t(tail[13]) << 40;
+  case 13: k2 ^= uint64_t(tail[12]) << 32;
+  case 12: k2 ^= uint64_t(tail[11]) << 24;
+  case 11: k2 ^= uint64_t(tail[10]) << 16;
+  case 10: k2 ^= uint64_t(tail[ 9]) << 8;
+  case  9: k2 ^= uint64_t(tail[ 8]) << 0;
+           k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
+
+  case  8: k1 ^= uint64_t(tail[ 7]) << 56;
+  case  7: k1 ^= uint64_t(tail[ 6]) << 48;
+  case  6: k1 ^= uint64_t(tail[ 5]) << 40;
+  case  5: k1 ^= uint64_t(tail[ 4]) << 32;
+  case  4: k1 ^= uint64_t(tail[ 3]) << 24;
+  case  3: k1 ^= uint64_t(tail[ 2]) << 16;
+  case  2: k1 ^= uint64_t(tail[ 1]) << 8;
+  case  1: k1 ^= uint64_t(tail[ 0]) << 0;
+           k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
+  };
+
+  //----------
+  // finalization
+
+  h1 ^= len; h2 ^= len;
+
+  h1 += h2;
+  h2 += h1;
+
+  h1 = fmix(h1);
+  h2 = fmix(h2);
+
+  h1 += h2;
+  h2 += h1;
+
+  ((uint64_t*)out)[0] = h1;
+  ((uint64_t*)out)[1] = h2;
+}
+
+//-----------------------------------------------------------------------------
+
--- a/cxxmph/MurmurHash3.h
+++ b/cxxmph/MurmurHash3.h
@@ -0,0 +1,37 @@
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+#ifndef _MURMURHASH3_H_
+#define _MURMURHASH3_H_
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+typedef unsigned char uint8_t;
+typedef unsigned long uint32_t;
+typedef unsigned __int64 uint64_t;
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#include <stdint.h>
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_32  ( const void * key, int len, uint32_t seed, void * out );
+
+void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
+
+void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
+
+//-----------------------------------------------------------------------------
+
+#endif // _MURMURHASH3_H_
--- a/cxxmph/benchmark.cc
+++ b/cxxmph/benchmark.cc
@@ -0,0 +1,142 @@
+#include "benchmark.h"
+
+#include <cerrno>
+#include <cstring>
+#include <cstdio>
+#include <memory>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include <iomanip>
+#include <iostream>
+#include <sstream>
+#include <vector>
+
+using std::cerr;
+using std::cout;
+using std::endl;
+using std::setfill;
+using std::setw;
+using std::string;
+using std::ostringstream;
+using std::vector;
+
+namespace {
+
+/* Subtract the `struct timeval' values X and Y,
+   storing the result in RESULT.
+   Return 1 if the difference is negative, otherwise 0.  */
+int timeval_subtract ( 
+    struct timeval *result, struct timeval *x, struct timeval* y) {
+  /* Perform the carry for the later subtraction by updating y. */
+  if (x->tv_usec < y->tv_usec) {
+    int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
+    y->tv_usec -= 1000000 * nsec;
+    y->tv_sec += nsec;
+  }
+  if (x->tv_usec - y->tv_usec > 1000000) {
+    int nsec = (x->tv_usec - y->tv_usec) / 1000000;
+    y->tv_usec += 1000000 * nsec;
+    y->tv_sec -= nsec;
+  }
+
+  /* Compute the time remaining to wait.
+     tv_usec is certainly positive. */
+  result->tv_sec = x->tv_sec - y->tv_sec;
+  result->tv_usec = x->tv_usec - y->tv_usec;
+
+  /* Return 1 if result is negative. */
+  return x->tv_sec < y->tv_sec;
+}
+
+// C++ iostream is terrible for formatting.
+string timeval_to_string(timeval tv) {
+  ostringstream out;
+  out << setfill(' ') << setw(3) << tv.tv_sec << '.';
+  out << setfill('0') << setw(6) << tv.tv_usec;
+  return out.str();
+}
+
+struct rusage getrusage_or_die() {
+  struct rusage rs;
+  int ret = getrusage(RUSAGE_SELF, &rs);
+  if (ret != 0) {
+    cerr << "rusage failed: " << strerror(errno) << endl;
+    exit(-1);
+  }
+  return rs;
+}
+
+struct timeval gettimeofday_or_die() {
+  struct timeval tv;
+  int ret = gettimeofday(&tv, NULL); 
+  if (ret != 0) {
+    cerr << "gettimeofday failed: " << strerror(errno) << endl;
+    exit(-1);
+  }
+  return tv;
+}
+
+#ifdef HAVE_CXA_DEMANGLE
+string demangle(const string& name) {
+  char buf[1024];
+  unsigned int size = 1024;
+  int status;
+  char* res = abi::__cxa_demangle(
+     name.c_str(), buf, &size, &status);
+  return res;
+}
+#else
+string demangle(const string& name) { return name; }
+#endif
+ 
+
+static vector<cxxmph::Benchmark*> g_benchmarks;
+
+}  // anonymous namespace
+
+namespace cxxmph {
+
+/* static */ void Benchmark::Register(Benchmark* bm) {
+  if (bm->name().empty()) {
+    string name = demangle(typeid(*bm).name());
+    bm->set_name(name);
+  }
+  g_benchmarks.push_back(bm);
+}
+
+/* static */ void Benchmark::RunAll() {
+  for (int i = 0; i < g_benchmarks.size(); ++i) {
+    std::auto_ptr<Benchmark> bm(g_benchmarks[i]);
+    if (!bm->SetUp()) {
+      cerr << "Set up phase for benchmark "
+           << bm->name() << " failed." << endl;
+      continue;
+    }
+    bm->MeasureRun();
+    bm->TearDown(); 
+  }
+}
+
+void Benchmark::MeasureRun() {
+  struct timeval walltime_begin = gettimeofday_or_die();
+  struct rusage begin = getrusage_or_die();
+  Run();
+  struct rusage end = getrusage_or_die();
+  struct timeval walltime_end = gettimeofday_or_die();
+
+  struct timeval utime;
+  timeval_subtract(&utime, &end.ru_utime, &begin.ru_utime);
+  struct timeval stime;
+  timeval_subtract(&stime, &end.ru_stime, &begin.ru_stime);
+  struct timeval wtime;
+  timeval_subtract(&wtime, &walltime_end, &walltime_begin);
+
+  cout << "Benchmark: " << name_ << endl;
+  cout << "CPU User time  : " << timeval_to_string(utime) << endl;
+  cout << "CPU System time: " << timeval_to_string(stime) << endl;
+  cout << "Wall clock time: " << timeval_to_string(wtime) << endl;
+  cout << endl;
+}
+
+}  // namespace cxxmph
--- a/cxxmph/benchmark.h
+++ b/cxxmph/benchmark.h
@@ -0,0 +1,32 @@
+#ifndef __CXXMPH_BENCHMARK_H__
+#define __CXXMPH_BENCHMARK_H__
+
+#include <string>
+#include <typeinfo>
+
+namespace cxxmph {
+
+class Benchmark {
+ public:
+  Benchmark() {}
+  virtual ~Benchmark() {}
+
+  const std::string& name() { return name_; }
+  void set_name(const std::string& name) { name_ = name; }
+
+  static void Register(Benchmark* bm);
+  static void RunAll();
+
+ protected:
+  virtual bool SetUp() { return true; }; 
+  virtual void Run() = 0;
+  virtual bool TearDown() { return true; };
+
+ private:
+  std::string name_;
+  void MeasureRun();
+};
+
+}  // namespace cxxmph
+
+#endif
--- a/cxxmph/bm_common.cc
+++ b/cxxmph/bm_common.cc
@@ -0,0 +1,71 @@
+#include <cmath>
+#include <fstream>
+#include <limits>
+#include <iostream>
+#include <set>
+
+#include "bm_common.h"
+
+using std::cerr;
+using std::endl;
+using std::set;
+using std::string;
+using std::vector;
+
+namespace cxxmph {
+  
+bool UrlsBenchmark::SetUp() {
+  vector<string> urls;
+  std::ifstream f(urls_file_.c_str());
+  if (!f.is_open()) {
+    cerr << "Failed to open urls file " << urls_file_ << endl;
+    return false;
+  }
+  string buffer;
+  while(std::getline(f, buffer)) urls.push_back(buffer);
+  set<string> unique(urls.begin(), urls.end());
+  if (unique.size() != urls.size()) {
+    cerr << "Input file has repeated keys." << endl;
+    return false;
+  }
+  urls.swap(urls_);
+  return true;
+}
+
+bool SearchUrlsBenchmark::SetUp() {
+  if (!UrlsBenchmark::SetUp()) return false;
+  int32_t miss_ratio_int32 = std::numeric_limits<int32_t>::max() * miss_ratio_;
+  forced_miss_urls_.resize(nsearches_);
+  random_.resize(nsearches_);
+  for (int i = 0; i < nsearches_; ++i) {
+    random_[i] = urls_[random() % urls_.size()];
+    if (random() < miss_ratio_int32) {
+      forced_miss_urls_[i] = random_[i].as_string() + ".force_miss";
+      random_[i] = forced_miss_urls_[i];
+    }
+  }
+  return true;
+}
+
+bool Uint64Benchmark::SetUp() {
+  set<uint64_t> unique;
+  for (int i = 0; i < count_; ++i) {
+    uint64_t v;
+    do { v = random(); } while (unique.find(v) != unique.end());
+    values_.push_back(v);
+    unique.insert(v);
+  }
+  return true;
+}
+
+bool SearchUint64Benchmark::SetUp() {
+  if (!Uint64Benchmark::SetUp()) return false;
+  random_.resize(nsearches_);
+  for (int i = 0; i < nsearches_; ++i) {
+    uint32_t pos = random() % values_.size();
+    random_[i] = values_[pos];
+  }
+  return true;
+}
+
+}  // namespace cxxmph 
--- a/cxxmph/bm_common.h
+++ b/cxxmph/bm_common.h
@@ -0,0 +1,69 @@
+#ifndef __CXXMPH_BM_COMMON_H__
+#define __CXXMPH_BM_COMMON_H__
+
+#include "stringpiece.h"
+
+#include <string>
+#include <vector>
+#include <unordered_map>  // std::hash
+#include "MurmurHash3.h"
+
+#include "benchmark.h"
+
+namespace std {
+template <> struct hash<cxxmph::StringPiece> {
+  uint32_t operator()(const cxxmph::StringPiece& k) const {
+    uint32_t out;
+    MurmurHash3_x86_32(k.data(), k.length(), 1, &out);
+    return out;
+  }
+};
+}  // namespace std
+
+namespace cxxmph {
+
+class UrlsBenchmark : public Benchmark {
+ public:
+  UrlsBenchmark(const std::string& urls_file) : urls_file_(urls_file) { }
+ protected:
+  virtual bool SetUp();
+  const std::string urls_file_;
+  std::vector<std::string> urls_;
+};
+
+class SearchUrlsBenchmark : public UrlsBenchmark {
+ public:
+  SearchUrlsBenchmark(const std::string& urls_file, uint32_t nsearches, float miss_ratio)
+      : UrlsBenchmark(urls_file), nsearches_(nsearches), miss_ratio_(miss_ratio) {}
+ protected:
+  virtual bool SetUp();
+  const uint32_t nsearches_; 
+  float miss_ratio_;
+  std::vector<std::string> forced_miss_urls_;
+  std::vector<StringPiece> random_;
+};
+
+class Uint64Benchmark : public Benchmark {
+ public:
+  Uint64Benchmark(uint32_t count) : count_(count) { }
+  virtual void Run() {}
+ protected:
+  virtual bool SetUp();
+  const uint32_t count_;
+  std::vector<uint64_t> values_;
+};
+
+class SearchUint64Benchmark : public Uint64Benchmark {
+ public:
+  SearchUint64Benchmark(uint32_t count, uint32_t nsearches)
+      : Uint64Benchmark(count), nsearches_(nsearches) { }
+  virtual void Run() {};
+ protected:
+  virtual bool SetUp();
+  const uint32_t nsearches_;
+  std::vector<uint64_t> random_;
+};
+
+}  // namespace cxxmph
+
+#endif  // __CXXMPH_BM_COMMON_H__
--- a/cxxmph/bm_index.cc
+++ b/cxxmph/bm_index.cc
@@ -0,0 +1,143 @@
+#include <cmph.h>
+
+#include <cstdio>
+#include <set>
+#include <string>
+#include <unordered_map>
+
+#include "bm_common.h"
+#include "stringpiece.h"
+#include "mph_index.h"
+
+using namespace cxxmph;
+
+using std::string;
+using std::unordered_map;
+
+class BM_MPHIndexCreate : public UrlsBenchmark {
+ public:
+  BM_MPHIndexCreate(const std::string& urls_file)
+      : UrlsBenchmark(urls_file) { }
+ protected:
+  virtual void Run() {
+    SimpleMPHIndex<StringPiece> index;
+    index.Reset(urls_.begin(), urls_.end(), urls_.size());
+  }
+};
+
+class BM_STLIndexCreate : public UrlsBenchmark {
+ public:
+  BM_STLIndexCreate(const std::string& urls_file)
+      : UrlsBenchmark(urls_file) { }
+ protected:
+  virtual void Run() {
+    unordered_map<StringPiece, uint32_t> index;
+    int idx = 0;
+    for (auto it = urls_.begin(); it != urls_.end(); ++it) {
+      index.insert(make_pair(*it, idx++));
+    }
+  }
+};
+
+class BM_MPHIndexSearch : public SearchUrlsBenchmark {
+ public:
+  BM_MPHIndexSearch(const std::string& urls_file, int nsearches)
+      : SearchUrlsBenchmark(urls_file, nsearches, 0) { }
+  virtual void Run() {
+    for (auto it = random_.begin(); it != random_.end(); ++it) {
+      auto idx = index_.index(*it);
+      // Collision check to be fair with STL
+      // if (strcmp(urls_[idx].c_str(), it->data()) != 0) idx = -1;
+    }
+  }
+ protected:
+  virtual bool SetUp () {
+   if (!SearchUrlsBenchmark::SetUp()) return false;
+   index_.Reset(urls_.begin(), urls_.end(), urls_.size());
+   return true;
+  }
+  SimpleMPHIndex<StringPiece> index_;
+};
+
+class BM_CmphIndexSearch : public SearchUrlsBenchmark {
+ public:
+  BM_CmphIndexSearch(const std::string& urls_file, int nsearches)
+      : SearchUrlsBenchmark(urls_file, nsearches, 0) { }
+  ~BM_CmphIndexSearch() { if (index_) cmph_destroy(index_); }
+  virtual void Run() {
+    for (auto it = random_.begin(); it != random_.end(); ++it) {
+      auto idx = cmph_search(index_, it->data(), it->length());
+      // Collision check to be fair with STL
+      if (strcmp(urls_[idx].c_str(), it->data()) != 0) idx = -1;
+    }
+  }
+ protected:
+  virtual bool SetUp() {
+   if (!SearchUrlsBenchmark::SetUp()) {
+      cerr << "Parent class setup failed." << endl;
+      return false;
+    }
+    FILE* f = fopen(urls_file_.c_str(), "r");
+    if (!f) {
+      cerr << "Faied to open " << urls_file_ << endl; 
+      return false;
+    }
+    cmph_io_adapter_t* source = cmph_io_nlfile_adapter(f);
+    if (!source) {
+      cerr << "Faied to create io adapter for " << urls_file_ << endl; 
+      return false;
+    }
+    cmph_config_t* config = cmph_config_new(source);
+    if (!config) {
+      cerr << "Failed to create config" << endl;
+      return false;
+    }
+    cmph_config_set_algo(config, CMPH_BDZ);
+    cmph_t* mphf = cmph_new(config);
+    if (!mphf) {
+      cerr << "Failed to create mphf." << endl;
+      return false;
+    }
+
+    cmph_config_destroy(config);
+    cmph_io_nlfile_adapter_destroy(source);
+    fclose(f);
+    index_ = mphf;
+    return true;
+  }
+  cmph_t* index_;
+};
+    
+
+class BM_STLIndexSearch : public SearchUrlsBenchmark {
+ public:
+  BM_STLIndexSearch(const std::string& urls_file, int nsearches)
+      : SearchUrlsBenchmark(urls_file, nsearches, 0) { }
+  virtual void Run() {
+    for (auto it = random_.begin(); it != random_.end(); ++it) {
+      auto idx = index_.find(*it);
+    }
+  }
+ protected:
+  virtual bool SetUp () {
+   if (!SearchUrlsBenchmark::SetUp()) return false;
+   unordered_map<StringPiece, uint32_t> index;
+   int idx = 0;
+   for (auto it = urls_.begin(); it != urls_.end(); ++it) {
+     index.insert(make_pair(*it, idx++));
+   }
+   index.swap(index_);
+   return true;
+  }
+  unordered_map<StringPiece, uint32_t> index_;
+};
+
+int main(int argc, char** argv) {
+  Benchmark::Register(new BM_MPHIndexCreate("URLS100k"));
+  Benchmark::Register(new BM_STLIndexCreate("URLS100k"));
+  Benchmark::Register(new BM_MPHIndexSearch("URLS100k", 10*1000*1000));
+  Benchmark::Register(new BM_STLIndexSearch("URLS100k", 10*1000*1000));
+  Benchmark::Register(new BM_CmphIndexSearch("URLS100k", 10*1000*1000));
+  Benchmark::RunAll();
+  return 0;
+}
--- a/cxxmph/bm_map.cc
+++ b/cxxmph/bm_map.cc
@@ -0,0 +1,101 @@
+#include <string>
+#include <tr1/unordered_map>
+
+#include "bm_common.h"
+#include "mph_map.h"
+
+using cxxmph::mph_map;
+using std::string;
+using std::unordered_map;
+
+namespace cxxmph {
+
+template<class MapType, class T>
+const T* myfind(const MapType& mymap, const T& k) {
+  auto it = mymap.find(k);
+  auto end = mymap.end();
+  if (it == end) return NULL;
+  return &it->second;
+}
+
+template <class MapType>
+class BM_CreateUrls : public UrlsBenchmark {
+ public:
+  BM_CreateUrls(const string& urls_file) : UrlsBenchmark(urls_file) { }
+  virtual void Run() {
+    MapType mymap;
+    for (auto it = urls_.begin(); it != urls_.end(); ++it) {
+      mymap[*it] = *it;
+    }
+  }
+};
+
+template <class MapType>
+class BM_SearchUrls : public SearchUrlsBenchmark {
+ public:
+  BM_SearchUrls(const std::string& urls_file, int nsearches, float miss_ratio) 
+      : SearchUrlsBenchmark(urls_file, nsearches, miss_ratio) { }
+  virtual void Run() {
+    for (auto it = random_.begin(); it != random_.end(); ++it) {
+      auto v = myfind(mymap_, *it);
+      assert(it->ends_with(".force_miss") ^ v != NULL);
+      assert(!v || *v == *it);
+    }
+  }
+ protected:
+  virtual bool SetUp() {
+    if (!SearchUrlsBenchmark::SetUp()) return false;
+    for (auto it = urls_.begin(); it != urls_.end(); ++it) {
+      mymap_[*it] = *it;
+    }
+    mymap_.rehash(mymap_.bucket_count());
+    fprintf(stderr, "Occupation: %f\n", static_cast<float>(mymap_.size())/mymap_.bucket_count());
+    return true;
+  }
+  MapType mymap_;
+};
+
+template <class MapType>
+class BM_SearchUint64 : public SearchUint64Benchmark {
+ public:
+  BM_SearchUint64() : SearchUint64Benchmark(100000, 10*1000*1000) { }
+  virtual bool SetUp() {
+    if (!SearchUint64Benchmark::SetUp()) return false;
+    for (int i = 0; i < values_.size(); ++i) {
+      mymap_[values_[i]] = values_[i];
+    }
+    mymap_.rehash(mymap_.bucket_count());
+    // Double check if everything is all right
+    for (int i = 0; i < values_.size(); ++i) {
+      if (mymap_[values_[i]] != values_[i]) return false;
+    }
+    return true;
+  }
+  virtual void Run() {
+    for (auto it = random_.begin(); it != random_.end(); ++it) {
+      auto v = myfind(mymap_, *it);
+      if (*v != *it) {
+        fprintf(stderr, "Looked for %lu got %lu\n", *it, *v);
+	exit(-1);
+      }
+    }
+  }
+  MapType mymap_;
+};
+
+}  // namespace cxxmph
+
+using namespace cxxmph;
+
+int main(int argc, char** argv) {
+  srandom(4);
+  Benchmark::Register(new BM_CreateUrls<mph_map<StringPiece, StringPiece>>("URLS100k"));
+  Benchmark::Register(new BM_CreateUrls<unordered_map<StringPiece, StringPiece>>("URLS100k"));
+  Benchmark::Register(new BM_SearchUrls<mph_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0));
+  Benchmark::Register(new BM_SearchUrls<unordered_map<StringPiece, StringPiece, Murmur3StringPiece>>("URLS100k", 10*1000 * 1000, 0));
+  Benchmark::Register(new BM_SearchUrls<mph_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0.9));
+  Benchmark::Register(new BM_SearchUrls<unordered_map<StringPiece, StringPiece, Murmur3StringPiece>>("URLS100k", 10*1000 * 1000, 0.9));
+  Benchmark::Register(new BM_SearchUint64<mph_map<uint64_t, uint64_t>>);
+  Benchmark::Register(new BM_SearchUint64<unordered_map<uint64_t, uint64_t>>);
+  Benchmark::RunAll();
+}
--- a/cxxmph/cxxmph.cc
+++ b/cxxmph/cxxmph.cc
@@ -0,0 +1,70 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+// Author: davi@google.com (Davi Reis)
+
+#include <getopt.h>
+
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "mph_map.h"
+#include "config.h"
+
+using std::cerr;
+using std::cout;
+using std::endl;
+using std::getline;
+using std::ifstream;
+using std::string;
+using std::vector;
+
+using cxxmph::mph_map;
+
+void usage(const char* prg) {
+  cerr << "usage: " << prg << " [-v] [-h] [-V] <keys.txt>" << endl;
+}
+void usage_long(const char* prg) {
+  usage(prg);
+  cerr << "   -h\t print this help message" << endl;
+  cerr << "   -V\t print version number and exit" << endl;
+  cerr << "   -v\t increase verbosity (may be used multiple times)" << endl;
+}
+
+int main(int argc, char** argv) {
+
+  int verbosity = 0;
+  while (1) {
+    char ch = (char)getopt(argc, argv, "hv");
+    if (ch == -1) break;
+    switch (ch) {
+      case 'h':
+        usage_long(argv[0]);
+        return 0;
+      case 'V':
+        std::cout << VERSION << std::endl;
+        return 0;
+      case 'v':
+        ++verbosity;
+        break;
+    }
+  }
+  if (optind != argc - 1) {
+    usage(argv[0]);
+    return 1;
+  }
+  vector<string> keys;
+  ifstream f(argv[optind]);
+  string buffer;
+  while (!getline(f, buffer).eof()) keys.push_back(buffer);
+  for (int i = 0; i < keys.size(); ++i) string s = keys[i];
+  mph_map<string, string> table;
+
+  for (int i = 0; i < keys.size(); ++i) table[keys[i]] = keys[i];
+  mph_map<string, string>::const_iterator it = table.begin();
+  mph_map<string, string>::const_iterator end = table.end();
+  for (int i = 0; it != end; ++it, ++i) {
+    cout << i << ": " << it->first
+         <<" -> " << it->second << endl;
+  }
+}
--- a/cxxmph/hollow_iterator.h
+++ b/cxxmph/hollow_iterator.h
@@ -0,0 +1,71 @@
+#ifndef __CXXMPH_HOLLOW_ITERATOR_H__
+#define __CXXMPH_HOLLOW_ITERATOR_H__
+
+#include <vector>
+
+namespace cxxmph {
+
+template <typename container_type, typename presence_type, typename iterator_type>
+struct hollow_iterator_base
+    : public std::iterator<std::forward_iterator_tag,
+                           typename container_type::value_type> {
+  typedef presence_type presence;
+  typedef container_type container;
+  typedef iterator_type iterator;
+  typedef hollow_iterator_base<container, presence, iterator>& self_reference;
+  typedef typename iterator::reference reference;
+  typedef typename iterator::pointer pointer;
+
+  hollow_iterator_base(container* c, presence* p, iterator it)
+      : c_(c), p_(p), it_(it) { if (c_) find_present(); }
+  self_reference operator++() {
+    ++it_; find_present();
+  }
+  reference operator*() { return *it_;  }
+  pointer operator->() { return &(*it_); }
+
+  // TODO find syntax to make this less permissible at compile time
+  template <class T>
+  bool operator==(const T& rhs) { return rhs.it_ == this->it_; }
+  template <class T>
+  bool operator!=(const T& rhs) { return rhs.it_ != this->it_; }
+
+ public:  // TODO find syntax to make this friend of const iterator
+  void find_present() {
+    while (it_ != c_->end() && !((*p_)[it_-c_->begin()])) ++it_;
+  }
+  container* c_;
+  presence* p_;
+  iterator it_;
+};
+
+template <typename container_type>
+struct hollow_iterator : public hollow_iterator_base<
+    container_type, std::vector<bool>, typename container_type::iterator> {
+  typedef hollow_iterator_base<
+      container_type, std::vector<bool>, typename container_type::iterator> parent_class;
+  hollow_iterator() : parent_class(NULL, NULL, typename container_type::iterator())  { }
+  hollow_iterator(typename parent_class::container* c,
+                  typename parent_class::presence* p,
+                  typename parent_class::iterator it)
+     : parent_class(c, p, it) { }
+};
+
+template <typename container_type>
+struct hollow_const_iterator : public hollow_iterator_base<
+    const container_type, const std::vector<bool>, typename container_type::const_iterator> {
+  typedef hollow_iterator_base<
+      const container_type, const std::vector<bool>, typename container_type::const_iterator> parent_class;
+  typedef hollow_const_iterator<container_type> self_type;
+  typedef hollow_iterator<container_type> non_const_type;
+  hollow_const_iterator(non_const_type rhs) : parent_class(rhs.c_, rhs.p_, typename container_type::const_iterator(rhs.it_)) { }
+  hollow_const_iterator() : parent_class(NULL, NULL, typename container_type::iterator())  { }
+  hollow_const_iterator(const typename parent_class::container* c,
+                        const typename parent_class::presence* p,
+                        typename parent_class::iterator it)
+     : parent_class(c, p, it) { }
+};
+
+}  // namespace cxxmph
+
+#endif  // __CXXMPH_HOLLOW_ITERATOR_H__
--- a/cxxmph/hollow_iterator_test.cc
+++ b/cxxmph/hollow_iterator_test.cc
@@ -0,0 +1,38 @@
+#include <cstdlib>
+#include <cstdio>
+#include <vector>
+
+#include "hollow_iterator.h"
+
+using std::vector;
+using cxxmph::hollow_iterator;
+using cxxmph::hollow_const_iterator;
+
+int main(int argc, char** argv) {
+  vector<int> v;
+  vector<bool> p;
+  for (int i = 0; i < 100; ++i) {
+    v.push_back(i);
+    p.push_back(i % 2 == 0);
+  }
+  auto begin = hollow_iterator<vector<int>>(&v, &p, v.begin());
+  auto end = hollow_iterator<vector<int>>(&v, &p, v.end());
+  for (auto it = begin; it != end; ++it) {
+    if (((*it) % 2) != 0) exit(-1);
+  }
+  hollow_const_iterator<vector<int>> const_begin(begin);
+  hollow_const_iterator<vector<int>> const_end(end);
+  for (auto it = const_begin; it != const_end; ++it) {
+    if (((*it) % 2) != 0) exit(-1);
+  }
+  vector<int>::iterator vit1 = v.begin();
+  vector<int>::const_iterator vit2 = v.begin();
+  if (vit1 != vit2) exit(-1);
+  auto it1 = hollow_iterator<vector<int>>(&v, &p, v.begin());
+  auto it2 = hollow_const_iterator<vector<int>>(&v, &p, v.begin());
+  if (it1 != it2) exit(-1);
+
+  hollow_iterator<vector<int>> default_constructed;
+  default_constructed = hollow_iterator<vector<int>>(&v, &p, v.begin());
+}
+
--- a/cxxmph/mph_bits.cc
+++ b/cxxmph/mph_bits.cc
@@ -0,0 +1,7 @@
+#include "mph_bits.h"
+
+namespace cxxmph {
+
+const uint8_t dynamic_2bitset::vmask[] = { 0xfc, 0xf3, 0xcf, 0x3f};
+
+}
--- a/cxxmph/mph_bits.h
+++ b/cxxmph/mph_bits.h
@@ -0,0 +1,75 @@
+#ifndef __CXXMPH_MPH_BITS_H__
+#define __CXXMPH_MPH_BITS_H__
+
+#include <stdint.h>  // for uint32_t and friends
+
+#include <array>
+#include <cassert>
+#include <climits>
+#include <cmath>
+#include <cstdio>
+#include <cstring>
+#include <limits>
+#include <vector>
+#include <utility>
+
+namespace cxxmph {
+
+class dynamic_2bitset {
+ public:
+  dynamic_2bitset() : size_(0), fill_(false)  {}
+  dynamic_2bitset(uint32_t size, bool fill = false)
+      : size_(size), fill_(fill), data_(ceil(size / 4.0), ones()*fill) {
+  }
+
+  const uint8_t operator[](uint32_t i) const { return get(i); }
+  const uint8_t get(uint32_t i) const { 
+    assert(i < size());
+    assert((i >> 2) < data_.size());
+    return (data_[(i >> 2)] >> (((i & 3) << 1)) & 3);
+  }
+  uint8_t set(uint32_t i, uint8_t v) { 
+    assert((i >> 2) < data_.size());
+    data_[(i >> 2)] |= ones() ^ dynamic_2bitset::vmask[i & 3];
+    data_[(i >> 2)] &= ((v << ((i & 3) << 1)) | dynamic_2bitset::vmask[i & 3]);
+    assert(v <= 3);
+    assert(get(i) == v);
+  }
+  void resize(uint32_t size) {
+    size_ = size;
+    data_.resize(size >> 2, fill_*ones());
+  }
+  void swap(dynamic_2bitset& other) {
+    std::swap(other.size_, size_);
+    std::swap(other.fill_, fill_);
+    other.data_.swap(data_);
+  }
+  void clear() { data_.clear(); size_ = 0; }
+    
+  uint32_t size() const { return size_; }
+  static const uint8_t vmask[];
+  const std::vector<uint8_t>& data() const { return data_; }
+ private:
+  uint32_t size_;
+  bool fill_;
+  std::vector<uint8_t> data_;
+  const uint8_t ones() { return std::numeric_limits<uint8_t>::max(); }
+};
+
+static uint32_t nextpoweroftwo(uint32_t k) {
+  if (k == 0) return 1;
+  k--;
+  for (int i=1; i<sizeof(uint32_t)*CHAR_BIT; i<<=1) k = k | k >> i;
+  return k+1;
+}
+
+// Interesting bit tricks that might end up here:
+// http://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
+// Fast a % (k*2^t)
+// http://www.azillionmonkeys.com/qed/adiv.html
+// rank and select:
+// http://vigna.dsi.unimi.it/ftp/papers/Broadword.pdf
+  
+}  // namespace cxxmph
+
+#endif
--- a/cxxmph/mph_bits_test.cc
+++ b/cxxmph/mph_bits_test.cc
@@ -0,0 +1,57 @@
+#include <cstdio>
+#include <cstdlib>
+
+#include "mph_bits.h"
+
+using cxxmph::dynamic_2bitset;
+int main(int argc, char** argv) {
+  dynamic_2bitset small(256, true);
+  for (int i = 0; i < small.size(); ++i) small.set(i, i % 4);
+  for (int i = 0; i < small.size(); ++i) {
+    if (small[i] != i % 4) {
+      fprintf(stderr, "wrong bits %d at %d expected %d\n", small[i], i, i % 4);
+      exit(-1);
+    }
+  }
+
+  int size = 256;
+  dynamic_2bitset bits(size, true /* fill with ones */);
+  for (int i = 0; i < size; ++i) {
+    if (bits[i] != 3)  {
+      fprintf(stderr, "wrong bits %d at %d expected %d\n", bits[i], i, 3);
+      exit(-1);
+    }
+  }
+  for (int i = 0; i < size; ++i) bits.set(i, 0);
+  for (int i = 0; i < size; ++i) {
+    if (bits[i] != 0)  {
+      fprintf(stderr, "wrong bits %d at %d expected %d\n", bits[i], i, 0);
+      exit(-1);
+    }
+  }
+  for (int i = 0; i < size; ++i) bits.set(i, i % 4);
+  for (int i = 0; i < size; ++i) {
+    if (bits[i] != i % 4) {
+      fprintf(stderr, "wrong bits %d at %d expected %d\n", bits[i], i, i % 4);
+      exit(-1);
+    }
+  }
+  dynamic_2bitset size_corner1(1);
+  if (size_corner1.size() != 1) exit(-1);
+  dynamic_2bitset size_corner2(2);
+  if (size_corner2.size() != 2) exit(-1);
+  (dynamic_2bitset(4, true)).swap(size_corner2);
+  if (size_corner2.size() != 4) exit(-1);
+  for (int i = 0; i < size_corner2.size(); ++i) {
+    if (size_corner2[i] != 3) exit(-1);
+  }
+  size_corner2.clear();
+  if (size_corner2.size() != 0) exit(-1);
+
+  dynamic_2bitset empty;
+  empty.clear();
+  dynamic_2bitset large(1000, true);
+  empty.swap(large);
+}
+  
+  
--- a/cxxmph/mph_index.cc
+++ b/cxxmph/mph_index.cc
@@ -0,0 +1,205 @@
+#include <limits>
+#include <iostream>
+#include <vector>
+
+using std::cerr;
+using std::endl;
+
+#include "mph_index.h"
+
+using std::vector;
+
+namespace {
+
+static const uint8_t kUnassigned = 3;
+// table used for looking up the number of assigned vertices to a 8-bit integer
+static uint8_t kBdzLookupIndex[] =
+{
+4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
+4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
+4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
+3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
+4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
+4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
+4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
+3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
+4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
+4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
+4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
+3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
+3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
+3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
+3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
+2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1, 0
+};
+
+}  // anonymous namespace
+
+namespace cxxmph {
+
+MPHIndex::~MPHIndex() {
+  clear();
+}
+
+void MPHIndex::clear() {
+  delete [] ranktable_;
+  ranktable_ = NULL;
+  ranktable_size_ = 0;
+  // TODO(davi) implement me
+}
+
+bool MPHIndex::GenerateQueue(
+    TriGraph* graph, vector<uint32_t>* queue_output) {
+  uint32_t queue_head = 0, queue_tail = 0;
+  uint32_t nedges = m_;
+  uint32_t nvertices = n_;
+  // Relies on vector<bool> using 1 bit per element
+  vector<bool> marked_edge(nedges + 1, false);
+  vector<uint32_t> queue(nvertices, 0);
+  for (uint32_t i = 0; i < nedges; ++i) {
+    const TriGraph::Edge& e = graph->edges()[i];
+    if (graph->vertex_degree()[e[0]] == 1 ||
+        graph->vertex_degree()[e[1]] == 1 ||
+        graph->vertex_degree()[e[2]] == 1) {
+      if (!marked_edge[i]) {
+        queue[queue_head++] = i;
+        marked_edge[i] = true;
+      }
+    }
+  }
+  /*
+  for (unsigned int i = 0; i < marked_edge.size(); ++i) {
+    cerr << "vertex with degree " << static_cast<uint32_t>(graph->vertex_degree()[i]) << " marked " << marked_edge[i] << endl;
+  }
+  for (unsigned int i = 0; i < queue.size(); ++i) {
+    cerr << "vertex " << i << " queued at " << queue[i] << endl;
+  }
+  */
+  // At this point queue head is the number of edges touching at least one
+  // vertex of degree 1.
+  // cerr << "Queue head " << queue_head << " Queue tail " << queue_tail << endl;
+  // graph->DebugGraph();
+  while (queue_tail != queue_head) {
+    uint32_t current_edge = queue[queue_tail++];
+    graph->RemoveEdge(current_edge);
+    const TriGraph::Edge& e = graph->edges()[current_edge];
+    for (int i = 0; i < 3; ++i) {
+      uint32_t v = e[i];
+      if (graph->vertex_degree()[v] == 1) {
+        uint32_t first_edge = graph->first_edge()[v];
+        if (!marked_edge[first_edge]) {
+          queue[queue_head++] = first_edge;
+          marked_edge[first_edge] = true;
+        }
+      }
+    }
+  }
+  /*
+  for (unsigned int i = 0; i < queue.size(); ++i) {
+    cerr << "vertex " << i << " queued at " << queue[i] << endl;
+  }
+  */
+  int cycles = queue_head - nedges;
+  if (cycles == 0) queue.swap(*queue_output);
+  return cycles == 0;
+}
+
+void MPHIndex::Assigning(
+    const vector<TriGraph::Edge>& edges, const vector<uint32_t>& queue) {
+  uint32_t current_edge = 0;
+  vector<bool> marked_vertices(n_ + 1);
+  dynamic_2bitset().swap(g_);
+  // Initialize vector of half nibbles with all bits set.
+  dynamic_2bitset g(n_, true /* set bits to 1 */);
+
+  uint32_t nedges = m_;  // for legibility
+  for (int i = nedges - 1; i + 1 >= 1; --i) {
+    current_edge = queue[i];
+    const TriGraph::Edge& e = edges[current_edge];
+    /*
+    cerr << "B: " << e[0] << " " << e[1] << " " << e[2] << " -> "
+        << get_2bit_value(g_, e[0]) << " "
+        << get_2bit_value(g_, e[1]) << " "
+        << get_2bit_value(g_, e[2]) << " edge " << current_edge  << endl;
+    */
+    if (!marked_vertices[e[0]]) {
+      if (!marked_vertices[e[1]]) {
+        g.set(e[1], kUnassigned);
+        marked_vertices[e[1]] = true;
+      }
+      if (!marked_vertices[e[2]]) {
+        g.set(e[2], kUnassigned);
+	assert(marked_vertices.size() > e[2]);
+        marked_vertices[e[2]] = true;
+      }
+      g.set(e[0], (6 - (g[e[1]] + g[e[2]])) % 3);
+      marked_vertices[e[0]] = true;
+    } else if (!marked_vertices[e[1]]) {
+      if (!marked_vertices[e[2]]) {
+        g.set(e[2], kUnassigned);
+        marked_vertices[e[2]] = true;
+      }
+      g.set(e[1], (7 - (g[e[0]] + g[e[2]])) % 3);
+      marked_vertices[e[1]] = true;
+    } else {
+      g.set(e[2], (8 - (g[e[0]] + g[e[1]])) % 3);
+      marked_vertices[e[2]] = true;
+    }
+    /*
+    cerr << "A: " << e[0] << " " << e[1] << " " << e[2] << " -> "
+        << static_cast<uint32_t>(g[e[0]]) << " "
+        << static_cast<uint32_t>(g[e[1]]) << " "
+        << static_cast<uint32_t>(g[e[2]]) << " " << endl;
+    */
+  }
+  g_.swap(g);
+}
+
+void MPHIndex::Ranking() {
+  uint32_t nbytes_total = static_cast<uint32_t>(ceil(n_ / 4.0));
+  uint32_t size = k_ >> 2U;
+  ranktable_size_ = static_cast<uint32_t>(
+      ceil(n_ / static_cast<double>(k_)));
+  delete [] ranktable_;
+  ranktable_ = NULL;
+  uint32_t* ranktable = new uint32_t[ranktable_size_];
+  memset(ranktable, 0, ranktable_size_*sizeof(uint32_t));
+  uint32_t offset = 0;
+  uint32_t count = 0;
+  uint32_t i = 1;
+  while (1) {
+    if (i == ranktable_size_) break;
+    uint32_t nbytes = size < nbytes_total ? size : nbytes_total;
+    for (uint32_t j = 0; j < nbytes; ++j) count += kBdzLookupIndex[g_[offset + j]];
+    ranktable[i] = count;
+    offset += nbytes;
+    nbytes_total -= size;
+    ++i;
+  }
+  ranktable_ = ranktable;
+}
+
+uint32_t MPHIndex::Rank(uint32_t vertex) const {
+  uint32_t index = vertex >> b_;
+  uint32_t base_rank = ranktable_[index];
+  uint32_t beg_idx_v = index << b_;
+  uint32_t beg_idx_b = beg_idx_v >> 2;
+  uint32_t end_idx_b = vertex >> 2;
+  while (beg_idx_b < end_idx_b) base_rank += kBdzLookupIndex[g_.data()[beg_idx_b++]];
+  beg_idx_v = beg_idx_b << 2;
+  // cerr << "beg_idx_v: " << beg_idx_v << endl;
+  // cerr << "base rank: " << base_rank << endl;
+  // cerr << "G: ";
+  // for (unsigned int i = 0; i < n_; ++i) {
+  //  cerr << static_cast<uint32_t>(g_[i]) << " ";
+  // }
+  // cerr << endl;
+  while (beg_idx_v < vertex) {
+    if (g_[beg_idx_v] != kUnassigned) ++base_rank;
+    ++beg_idx_v;
+  }
+  // cerr << "Base rank: " << base_rank << endl;
+  return base_rank;
+}
+
+}  // namespace cxxmph
--- a/cxxmph/mph_index.h
+++ b/cxxmph/mph_index.h
@@ -0,0 +1,230 @@
+#ifndef __CXXMPH_MPH_INDEX_H__
+#define __CXXMPH_MPH_INDEX_H__
+
+// Minimal perfect hash abstraction implementing the BDZ algorithm
+//
+// This is a data structure that given a set of known keys S, will create a
+// mapping from S to [0..|S|). The class is informed about S through the Reset
+// method and the mapping is queried by calling index(key).
+//
+// This is a pretty uncommon data structure, and if you application has a real
+// use case for it, chances are that it is a real win. If all you are doing is
+// a straightforward implementation of an in-memory associative mapping data
+// structure (e.g., mph_map.h), then it will probably be slower, since that the
+// evaluation of index() is typically slower than the total cost of running a
+// traditional hash function over a key and doing 2-3 conflict resolutions on
+// 100byte-ish strings.
+//
+// Thesis presenting this and similar algorithms:
+// http://homepages.dcc.ufmg.br/~fbotelho/en/talks/thesis2008/thesis.pdf
+//
+//
+// Notes:
+//
+// Most users can use the SimpleMPHIndex wrapper instead of the MPHIndex which
+// have confusing template parameters.
+// This class only implements a minimal perfect hash function, it does not
+// implement an associative mapping data structure.
+
+#include <stdint.h>
+
+#include <cassert>
+#include <climits>
+#include <cmath>
+#include <unordered_map>  // for std::hash
+#include <vector>
+
+#include <iostream>
+
+using std::cerr;
+using std::endl;
+
+#include "seeded_hash.h"
+#include "mph_bits.h"
+#include "trigraph.h"
+
+namespace cxxmph {
+
+class MPHIndex {
+ public:
+  MPHIndex(double c = 1.23, uint8_t b = 7) :
+      c_(c), b_(b), m_(0), n_(0), k_(0), r_(1),
+      ranktable_(NULL), ranktable_size_(0) { }
+  ~MPHIndex();
+
+  template <class SeededHashFcn, class ForwardIterator>
+  bool Reset(ForwardIterator begin, ForwardIterator end, uint32_t size);
+  template <class SeededHashFcn, class Key>  // must agree with Reset
+  // Get a unique identifier for k, in the range [0;size()). If x wasn't part
+  // of the input in the last Reset call, returns a random value.
+  uint32_t index(const Key& x) const;
+  uint32_t size() const { return m_; }
+  void clear();
+
+  // Advanced users functions. Please avoid unless you know what you are doing.
+  uint32_t perfect_hash_size() const { return n_; }
+  template <class SeededHashFcn, class Key>  // must agree with Reset
+  uint32_t perfect_hash(const Key& x) const;
+  template <class SeededHashFcn, class Key>  // must agree with Reset
+  uint32_t minimal_perfect_hash(const Key& x) const;
+
+  // Crazy functions. Ignore.
+  template <class SeededHashFcn, class Key>  // must agree with Reset
+  void hash_vector(const Key& x, uint32_t* h) const;
+
+ private:
+  template <class SeededHashFcn, class ForwardIterator>
+  bool Mapping(ForwardIterator begin, ForwardIterator end,
+               std::vector<TriGraph::Edge>* edges,
+               std::vector<uint32_t>* queue);
+  bool GenerateQueue(TriGraph* graph, std::vector<uint32_t>* queue);
+  void Assigning(const std::vector<TriGraph::Edge>& edges,
+                 const std::vector<uint32_t>& queue);
+  void Ranking();
+  uint32_t Rank(uint32_t vertex) const;
+
+  // Algorithm parameters
+  // Perfect hash function density. If this was a 2graph,
+  // then probability of having an acyclic graph would be 
+  // sqrt(1-(2/c)^2). See section 3 for details.
+  // http://www.it-c.dk/people/pagh/papers/simpleperf.pdf
+  double c_;
+  uint8_t b_;  // Number of bits of the kth index in the ranktable
+
+  // Values used during generation
+  uint32_t m_;  // edges count
+  uint32_t n_;  // vertex count
+  uint32_t k_;  // kth index in ranktable, $k = log_2(n=3r)\varepsilon$
+
+  // Values used during search
+
+  // Partition vertex count, derived from c parameter.
+  uint32_t r_;
+  uint32_t nest_displacement_[3];  // derived from r_
+
+  // The array containing the minimal perfect hash function graph.
+  dynamic_2bitset g_;
+  uint8_t threebit_mod3[10];  // speed up mod3 calculation for 3bit ints
+  // The table used for the rank step of the minimal perfect hash function
+  const uint32_t* ranktable_;
+  uint32_t ranktable_size_;
+  // The selected hash seed triplet for finding the edges in the minimal
+  // perfect hash function graph.
+  uint32_t hash_seed_[3];
+};
+
+// Template method needs to go in the header file.
+template <class SeededHashFcn, class ForwardIterator>
+bool MPHIndex::Reset(
+    ForwardIterator begin, ForwardIterator end, uint32_t size) {
+  if (end == begin) {
+    clear();
+    return true;
+  }
+  m_ = size;
+  r_ = static_cast<uint32_t>(ceil((c_*m_)/3));
+  if ((r_ % 2) == 0) r_ += 1;
+  // This can be used to speed mods, but increases occupation too much. 
+  // Needs to try http://gmplib.org/manual/Integer-Exponentiation.html instead
+  // r_ = nextpoweroftwo(r_);
+  nest_displacement_[0] = 0;
+  nest_displacement_[1] = r_;
+  nest_displacement_[2] = (r_ << 1);
+  for (int i = 0; i < sizeof(threebit_mod3); ++i) threebit_mod3[i] = i % 3;
+
+  n_ = 3*r_;
+  k_ = 1U << b_;
+
+  // cerr << "m " << m_ << " n " << n_ << " r " << r_ << endl;
+
+  int iterations = 1000;
+  std::vector<TriGraph::Edge> edges;
+  std::vector<uint32_t> queue;
+  while (1) {
+    // cerr << "Iterations missing: " << iterations << endl;
+    for (int i = 0; i < 3; ++i) hash_seed_[i] = random();
+    if (Mapping<SeededHashFcn>(begin, end, &edges, &queue)) break;
+    else --iterations;
+    if (iterations == 0) break;
+  }
+  if (iterations == 0) return false;
+  Assigning(edges, queue);
+  std::vector<TriGraph::Edge>().swap(edges);
+  Ranking();
+  return true;
+}
+
+template <class SeededHashFcn, class ForwardIterator>
+bool MPHIndex::Mapping(
+    ForwardIterator begin, ForwardIterator end,
+    std::vector<TriGraph::Edge>* edges, std::vector<uint32_t>* queue) {
+  TriGraph graph(n_, m_);
+  for (ForwardIterator it = begin; it != end; ++it) { 
+    uint32_t h[4];
+    SeededHashFcn().hash64(*it, hash_seed_[0], reinterpret_cast<uint32_t*>(&h));
+    // for (int i = 0; i < 3; ++i) h[i] = SeededHashFcn()(*it, hash_seed_[i]);
+    uint32_t v0 = h[0] % r_;
+    uint32_t v1 = h[1] % r_ + r_;
+    uint32_t v2 = h[2] % r_ + (r_ << 1);
+    // cerr << "Key: " << *it << " edge " <<  it - begin << " (" << v0 << "," << v1 << "," << v2 << ")" << endl;
+    graph.AddEdge(TriGraph::Edge(v0, v1, v2));
+  }
+  if (GenerateQueue(&graph, queue)) {
+     graph.ExtractEdgesAndClear(edges);
+     return true;
+  }
+  return false;
+}
+
+template <class SeededHashFcn, class Key>
+void MPHIndex::hash_vector(const Key& key, uint32_t* h) const {
+  SeededHashFcn().hash64(key, hash_seed_[0], h);
+}
+
+template <class SeededHashFcn, class Key>
+uint32_t MPHIndex::perfect_hash(const Key& key) const {
+  uint32_t h[4];
+  if (!g_.size()) return 0;
+  SeededHashFcn().hash64(key, hash_seed_[0], h);
+  h[0] = (h[0] % r_) + nest_displacement_[0];
+  h[1] = (h[1] % r_) + nest_displacement_[1];
+  h[2] = (h[2] % r_) + nest_displacement_[2];
+  // h[0] = (h[0] & (r_-1)) + nest_displacement_[0];
+  // h[1] = (h[1] & (r_-1)) + nest_displacement_[1];
+  // h[2] = (h[2] & (r_-1)) + nest_displacement_[2];
+  assert((h[0]) < g_.size());
+  assert((h[1]) < g_.size());
+  assert((h[2]) < g_.size());
+  uint8_t nest = threebit_mod3[
+      g_[h[0]] + g_[h[1]] + g_[h[2]]];
+  uint32_t vertex = h[nest];
+  return vertex;
+}
+template <class SeededHashFcn, class Key>
+uint32_t MPHIndex::minimal_perfect_hash(const Key& key) const {
+  return Rank(perfect_hash<SeededHashFcn, Key>(key));
+}
+
+template <class SeededHashFcn, class Key>
+uint32_t MPHIndex::index(const Key& key) const {
+  return minimal_perfect_hash<SeededHashFcn, Key>(key);
+}
+
+// Simple wrapper around MPHIndex to simplify calling code. Please refer to the
+// MPHIndex class for documentation.
+template <class Key, class HashFcn = typename seeded_hash<std::hash<Key>>::hash_function>
+class SimpleMPHIndex : public MPHIndex {
+ public:
+  template <class ForwardIterator>
+  bool Reset(ForwardIterator begin, ForwardIterator end, uint32_t size) {
+    return MPHIndex::Reset<HashFcn>(begin, end, size);
+  }
+  uint32_t index(const Key& key) const { return MPHIndex::index<HashFcn>(key); }
+  uint32_t perfect_hash(const Key& key) const { return MPHIndex::perfect_hash<HashFcn>(key); }
+  uint32_t minimal_perfect_hash(const Key& key) const { return MPHIndex::minimal_perfect_hash<HashFcn>(key); }
+  void hash_vector(const Key& key, uint32_t* h) const { MPHIndex::hash_vector<HashFcn>(key, h); }
+};
+
+}  // namespace cxxmph
+
+#endif // __CXXMPH_MPH_INDEX_H__
--- a/cxxmph/mph_index_test.cc
+++ b/cxxmph/mph_index_test.cc
@@ -0,0 +1,42 @@
+#include <algorithm>
+#include <cassert>
+#include <string>
+#include <vector>
+
+#include "mph_index.h"
+
+using std::string;
+using std::vector;
+using namespace cxxmph;
+
+int main(int argc, char** argv) {
+
+  srand(1);
+  vector<string> keys;
+  keys.push_back("davi");
+  keys.push_back("paulo");
+  keys.push_back("joao");
+  keys.push_back("maria");
+  keys.push_back("bruno");
+  keys.push_back("paula");
+  keys.push_back("diego");
+  keys.push_back("diogo");
+  keys.push_back("algume");
+
+  SimpleMPHIndex<string> mph_index;
+  if (!mph_index.Reset(keys.begin(), keys.end(), keys.size())) { exit(-1); }
+  vector<int> ids;
+  for (vector<int>::size_type i = 0; i < keys.size(); ++i) {
+     ids.push_back(mph_index.index(keys[i]));
+     cerr << " " << *(ids.end() - 1);
+  }
+  cerr << endl;
+  sort(ids.begin(), ids.end());
+  for (vector<int>::size_type i = 0; i < ids.size(); ++i) assert(ids[i] == static_cast<vector<int>::value_type>(i));
+   /*
+  char* serialized = new char[mph_index.serialize_bytes_needed()];
+  mph_index.serialize(serialized);
+  SimpleMPHIndex<string> other_mph_index;
+  other_mph_index.deserialize(serialized);
+  */
+}
--- a/cxxmph/mph_map.h
+++ b/cxxmph/mph_map.h
@@ -0,0 +1,261 @@
+#ifndef __CXXMPH_MPH_MAP_H__
+#define __CXXMPH_MPH_MAP_H__
+// Implementation of the unordered associative mapping interface using a
+// minimal perfect hash function.
+//
+// This class is about 20% to 100% slower than unordered_map (or ext/hash_map)
+// and should not be used if performance is a concern. In fact, you should only
+// use it for educational purposes.
+//
+// See http://www.strchr.com/crc32_popcnt and new Murmur3 function to try to beat stl
+
+#include <algorithm>
+#include <iostream>
+#include <limits>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+#include <utility>  // for std::pair
+
+#include "mph_bits.h"
+#include "mph_index.h"
+#include "hollow_iterator.h"
+
+namespace cxxmph {
+
+using std::pair;
+using std::make_pair;
+using std::unordered_map;
+using std::vector;
+
+// Save on repetitive typing.
+#define MPH_MAP_TMPL_SPEC template <class Key, class Data, class HashFcn, class EqualKey, class Alloc>
+#define MPH_MAP_CLASS_SPEC mph_map<Key, Data, HashFcn, EqualKey, Alloc>
+#define MPH_MAP_METHOD_DECL(r, m) MPH_MAP_TMPL_SPEC typename MPH_MAP_CLASS_SPEC::r MPH_MAP_CLASS_SPEC::m
+
+template <class Key, class Data, class HashFcn = std::hash<Key>, class EqualKey = std::equal_to<Key>, class Alloc = std::allocator<Data> >
+class mph_map {
+ public:
+  typedef Key key_type;
+  typedef Data data_type;
+  typedef pair<Key, Data> value_type;
+  typedef HashFcn hasher;
+  typedef EqualKey key_equal;
+
+  typedef typename std::vector<value_type>::pointer pointer;
+  typedef typename std::vector<value_type>::reference reference;
+  typedef typename std::vector<value_type>::const_reference const_reference;
+  typedef typename std::vector<value_type>::size_type size_type;
+  typedef typename std::vector<value_type>::difference_type difference_type;
+
+  typedef hollow_iterator<std::vector<value_type>> iterator;
+  typedef hollow_const_iterator<std::vector<value_type>> const_iterator;
+
+  // For making macros simpler.
+  typedef void void_type;
+  typedef bool bool_type;
+  typedef pair<iterator, bool> insert_return_type;
+
+  mph_map();
+  ~mph_map();
+
+  iterator begin();
+  iterator end();
+  const_iterator begin() const;
+  const_iterator end() const;
+  size_type size() const;
+  bool empty() const;
+  void clear();
+  void erase(iterator pos);
+  void erase(const key_type& k);
+  pair<iterator, bool> insert(const value_type& x);
+  iterator find(const key_type& k) { return slow_find(k, index_.perfect_hash(k)); }
+  const_iterator find(const key_type& k) const { return slow_find(k, index_.perfect_hash(k)); };
+  typedef int32_t my_int32_t;  // help macros
+  int32_t index(const key_type& k) const;
+  data_type& operator[](const key_type &k);
+  const data_type& operator[](const key_type &k) const;
+
+  size_type bucket_count() const { return index_.perfect_hash_size() + slack_.bucket_count(); }
+  void rehash(size_type nbuckets /*ignored*/); 
+
+ protected:  // mimicking STL implementation
+  EqualKey equal_;
+
+ private:
+   template <typename iterator>
+   struct iterator_first : public iterator {
+     iterator_first(iterator it) : iterator(it) { }
+     const typename iterator::value_type::first_type& operator*() {
+      return this->iterator::operator*().first;
+     }
+   };
+
+   template <typename iterator>
+     iterator_first<iterator> make_iterator_first(iterator it) {
+     return iterator_first<iterator>(it);
+   }
+
+   iterator make_iterator(typename std::vector<value_type>::iterator it) {
+     return hollow_iterator<std::vector<value_type>>(&values_, &present_, it);
+   }
+   const_iterator make_iterator(typename std::vector<value_type>::const_iterator it) const {
+     return hollow_const_iterator<std::vector<value_type>>(&values_, &present_, it);
+   }
+
+   // Experimental functions, not always faster
+   iterator fast_find(const key_type& k);
+   iterator slow_find(const key_type& k, uint32_t perfect_hash);
+   const_iterator slow_find(const key_type& k, uint32_t perfect_hash) const;
+
+   void pack();
+   std::vector<value_type> values_;
+   std::vector<bool> present_;
+   SimpleMPHIndex<Key, typename seeded_hash<HashFcn>::hash_function> index_;
+   // TODO(davi) optimize slack to hold 128 unique bits from hash64 as key
+   typedef unordered_map<Key, uint32_t, HashFcn, EqualKey, Alloc> slack_type;
+   slack_type slack_;
+   size_type size_;
+
+   mutable uint64_t fast_;
+   mutable uint64_t fast_taken_;
+   mutable uint64_t slow_;
+   mutable uint64_t very_slow_;
+};
+
+MPH_MAP_TMPL_SPEC
+bool operator==(const MPH_MAP_CLASS_SPEC& lhs, const MPH_MAP_CLASS_SPEC& rhs) {
+  return lhs.size() == rhs.size() && std::equal(lhs.begin(), lhs.end(), rhs.begin());
+}
+
+MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::mph_map() : size_(0) {
+  clear();
+  pack();
+}
+
+MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::~mph_map() {
+  // fprintf(stderr, "Fast taken: %d Fast: %d Slow %d very_slow %d ratio %f\n", fast_taken_, fast_, slow_, very_slow_, fast_*1.0/slow_);
+}
+
+MPH_MAP_METHOD_DECL(insert_return_type, insert)(const value_type& x) {
+  auto it = find(x.first);
+  auto it_end = end();
+  if (it != it_end) return make_pair(it, false);
+  bool should_pack = false;
+  if (values_.capacity() == values_.size() && values_.size() > 256) {
+    should_pack = true;
+  }
+  values_.push_back(x);
+  present_.push_back(true);
+  ++size_;
+  slack_.insert(make_pair(x.first, values_.size() - 1));
+  if (should_pack) pack();
+  it = find(x.first);
+  slow_ = 0;
+  very_slow_ = 0;
+  fast_ = 0;
+  fast_taken_ = 0;
+  return make_pair(it, true);
+}
+
+MPH_MAP_METHOD_DECL(void_type, pack)() {
+  // fprintf(stderr, "Paki %d values\n", values_.size());
+  if (values_.empty()) return;
+  assert(std::unordered_set<key_type>(make_iterator_first(begin()), make_iterator_first(end())).size() == size());
+  bool success = index_.Reset(
+      make_iterator_first(begin()),
+      make_iterator_first(end()), size_);
+  assert(success);
+  std::vector<value_type> new_values(index_.perfect_hash_size());
+  new_values.reserve(new_values.size() * 2);
+  std::vector<bool> new_present(index_.perfect_hash_size(), false);
+  new_present.reserve(new_present.size() * 2);
+  for (iterator it = begin(), it_end = end(); it != it_end; ++it) {
+    size_type id = index_.perfect_hash(it->first);
+    assert(id < new_values.size());
+    new_values[id] = *it;
+    new_present[id] = true;
+  }
+  // fprintf(stderr, "Collision ratio: %f\n", collisions*1.0/size());
+  values_.swap(new_values);
+  present_.swap(new_present);
+  slack_type().swap(slack_);
+}
+
+MPH_MAP_METHOD_DECL(iterator, begin)() { return make_iterator(values_.begin()); }
+MPH_MAP_METHOD_DECL(iterator, end)() { return make_iterator(values_.end()); }
+MPH_MAP_METHOD_DECL(const_iterator, begin)() const { return make_iterator(values_.begin()); }
+MPH_MAP_METHOD_DECL(const_iterator, end)() const { return make_iterator(values_.end()); }
+MPH_MAP_METHOD_DECL(bool_type, empty)() const { return size_ == 0; }
+MPH_MAP_METHOD_DECL(size_type, size)() const { return size_; }
+
+MPH_MAP_METHOD_DECL(void_type, clear)() {
+  values_.clear();
+  present_.clear();
+  slack_.clear();
+  index_.clear();
+  size_ = 0;
+}
+
+MPH_MAP_METHOD_DECL(void_type, erase)(iterator pos) {
+  present_[pos - begin] = false;
+  uint32_t h[4];
+  index_.hash_vector(pos->first, &h);
+  *pos = value_type();
+  --size_;
+}
+MPH_MAP_METHOD_DECL(void_type, erase)(const key_type& k) {
+  iterator it = find(k);
+  if (it == end()) return;
+  erase(it);
+}
+
+MPH_MAP_METHOD_DECL(const_iterator, slow_find)(const key_type& k, uint32_t perfect_hash) const {
+  if (__builtin_expect(index_.perfect_hash_size(), 1)) {
+    if (__builtin_expect(present_[perfect_hash], true)) { 
+      auto vit = values_.begin() + perfect_hash;
+      if (equal_(k, vit->first)) return make_iterator(vit);
+    }
+  }
+  if (__builtin_expect(!slack_.empty(), 0)) {
+     ++very_slow_;
+     auto sit = slack_.find(k);
+     if (sit != slack_.end()) return make_iterator(values_.begin() + sit->second);
+  }
+  return end();
+}
+
+MPH_MAP_METHOD_DECL(iterator, slow_find)(const key_type& k, uint32_t perfect_hash) {
+  if (__builtin_expect(index_.perfect_hash_size(), 1)) {
+    if (__builtin_expect(present_[perfect_hash], true)) { 
+      auto vit = values_.begin() + perfect_hash;
+      if (equal_(k, vit->first)) return make_iterator(vit);
+    }
+  }
+  if (__builtin_expect(!slack_.empty(), 0)) {
+     ++very_slow_;
+     auto sit = slack_.find(k);
+     if (sit != slack_.end()) return make_iterator(values_.begin() + sit->second);
+  }
+  return end();
+}
+
+MPH_MAP_METHOD_DECL(my_int32_t, index)(const key_type& k) const {
+  if (index_.size() == 0) return -1;
+  return index_.perfect_hash(k);
+}
+
+MPH_MAP_METHOD_DECL(data_type&, operator[])(const key_type& k) {
+  return insert(make_pair(k, data_type())).first->second;
+}
+MPH_MAP_METHOD_DECL(void_type, rehash)(size_type nbuckets) {
+  pack();
+  vector<value_type>(values_.begin(), values_.end()).swap(values_);
+  vector<bool>(present_.begin(), present_.end()).swap(present_);
+  slack_type().swap(slack_);
+}
+
+
+}  // namespace cxxmph
+
+#endif  // __CXXMPH_MPH_MAP_H__
--- a/cxxmph/mph_map_test.cc
+++ b/cxxmph/mph_map_test.cc
@@ -0,0 +1,61 @@
+#include <cstdio>
+#include <cstdlib>
+#include <iostream>
+#include <string>
+
+#include "mph_map.h"
+
+using std::make_pair;
+using std::string;
+using cxxmph::mph_map;
+
+int main(int argc, char** argv) {
+  mph_map<int64_t, int64_t> b;
+  int32_t num_keys = 1000*10;
+  for (int i = 0; i < num_keys; ++i) {
+    b.insert(make_pair(i, i));
+  }
+  b.rehash(b.size());
+  for (int i = 0; i < 1000000; ++i) {
+    auto it = b.find(i % num_keys);
+    if (it == b.end()) {
+      std::cerr << "Failed to find " << i << std::endl;
+      exit(-1);
+    }
+    if (it->first != it->second || it->first != i % num_keys) {
+      std::cerr << "Found " << it->first << " looking for " << i << std::endl;
+      exit(-1);
+    }
+  }
+  /*
+  mph_map<string, int> h;
+  h.insert(std::make_pair("-1",-1));
+  mph_map<string, int>::const_iterator it;
+  for (it = h.begin(); it != h.end(); ++it) {
+    if (it->second != -1) exit(-1);
+  }
+  int32_t num_valid = 100;
+  for (int i = 0; i < num_valid; ++i) {
+     char buf[10];    
+     snprintf(buf, 10, "%d", i);
+     h.insert(std::make_pair(buf, i));
+  }
+  for (int j = 0; j < 100; ++j) {
+    for (int i = 1000; i > 0; --i) {
+       char buf[10];    
+       snprintf(buf, 10, "%d", i - 1);
+       auto it = h.find(buf);
+       if (i < num_valid && it->second != i - 1) exit(-1);
+    }
+  }
+  for (int j = 0; j < 100; ++j) {
+    for (int i = 1000; i > 0; --i) {
+       char buf[10];    
+       int key = i*100 - 1;
+       snprintf(buf, 10, "%d", key);
+       auto it = h.find(buf);
+       if (key < num_valid && it->second != key) exit(-1);
+    }
+  }
+  */
+}
--- a/cxxmph/seeded_hash.h
+++ b/cxxmph/seeded_hash.h
@@ -0,0 +1,154 @@
+#ifndef __CXXMPH_SEEDED_HASH_H__
+#define __CXXMPH_SEEDED_HASH_H__
+
+#include <stdint.h>  // for uint32_t and friends
+
+#include <cstdlib>
+#include <unordered_map>  // for std::hash
+
+#include "MurmurHash3.h"
+#include "stringpiece.h"
+
+// From murmur, only used naively to extend 32 bits functions to 128 bits.
+uint32_t fmix ( uint32_t h );
+// Used for a quick and dirty hash function for integers. Probably a bad idea.
+uint64_t fmix ( uint64_t h );
+
+namespace cxxmph {
+
+template <class HashFcn>
+struct seeded_hash_function {
+  template <class Key>
+  uint32_t operator()(const Key& k, uint32_t seed) const {
+    return HashFcn()(k) ^ seed;
+  }
+  template <class Key>
+  void hash64(const Key& k, uint32_t seed, uint32_t* out) const {
+    for (int i = 0; i < 4; ++i) {
+      out[i] = HashFcn()(k) ^ seed;
+      seed = fmix(seed);
+    }
+  }
+};
+
+struct Murmur3 {
+  template<class Key>
+  uint32_t operator()(const Key& k) const {
+    uint32_t out;
+    MurmurHash3_x86_32(reinterpret_cast<const void*>(&k), sizeof(Key), 1 /* seed */, &out);
+    return out;
+  }
+  template <class Key>
+  void hash64(const Key& k, uint32_t* out) const {
+    MurmurHash3_x64_128(reinterpret_cast<const void*>(&k), sizeof(Key), 1 /* seed */, out);
+  }
+};
+
+struct Murmur3StringPiece {
+  template <class Key>
+  uint32_t operator()(const Key& k) const {
+    StringPiece s(k);
+    uint32_t out;
+    MurmurHash3_x86_32(s.data(), s.length(), 1 /* seed */, &out);
+    return out;
+  }
+  template <class Key>
+  void hash64(const Key& k, uint32_t* out) const {
+    StringPiece s(k);
+    MurmurHash3_x64_128(s.data(), s.length(), 1 /* seed */, out);
+  }
+};
+
+struct Murmur3Fmix64bitsType {
+  template <class Key>
+  uint32_t operator()(const Key& k) const {
+    return fmix(*reinterpret_cast<const uint64_t*>(&k));
+  }
+  template <class Key>
+  void hash64(const Key& k, uint32_t* out) const {
+    *reinterpret_cast<uint64_t*>(out) = fmix(k);
+    *(out + 2) = fmix(*out);
+  }
+};
+
+template <>
+struct seeded_hash_function<Murmur3> {
+  template <class Key>
+  uint32_t operator()(const Key& k, uint32_t seed) const {
+    uint32_t out;
+    MurmurHash3_x86_32(reinterpret_cast<const void*>(&k), sizeof(Key), seed, &out);
+    return out;
+  }
+  template <class Key>
+  void hash64(const Key& k, uint32_t seed, uint32_t* out) const {
+    MurmurHash3_x64_128(reinterpret_cast<const void*>(&k), sizeof(Key), seed, out);
+  }
+};
+
+template <>
+struct seeded_hash_function<Murmur3StringPiece> {
+  template <class Key>
+  uint32_t operator()(const Key& k, uint32_t seed) const {
+    StringPiece s(k);
+    uint32_t out;
+    MurmurHash3_x86_32(s.data(), s.length(), seed, &out);
+    return out;
+  }
+  template <class Key>
+  void hash64(const Key& k, uint32_t seed, uint32_t* out) const {
+    StringPiece s(k);
+    MurmurHash3_x64_128(s.data(), s.length(), seed, out);
+  }
+};
+
+template <>
+struct seeded_hash_function<Murmur3Fmix64bitsType> {
+  template <class Key>
+  uint32_t operator()(const Key& k, uint32_t seed) const {
+    return fmix(k + seed);
+  }
+  template <class Key>
+  void hash64(const Key& k, uint32_t seed, uint32_t* out) const {
+    *reinterpret_cast<uint64_t*>(out) = fmix(k ^ seed);
+    *(out + 2) = fmix(*out);
+  }
+};
+
+
+template <class HashFcn> struct seeded_hash
+{ typedef seeded_hash_function<HashFcn> hash_function; };
+// Use Murmur3 instead for all types defined in std::hash, plus
+// std::string which is commonly extended.
+template <> struct seeded_hash<std::hash<char*> >
+{ typedef seeded_hash_function<Murmur3StringPiece> hash_function; };
+template <> struct seeded_hash<std::hash<const char*> >
+{ typedef seeded_hash_function<Murmur3StringPiece> hash_function; };
+template <> struct seeded_hash<std::hash<std::string> >
+{ typedef seeded_hash_function<Murmur3StringPiece> hash_function; };
+template <> struct seeded_hash<std::hash<cxxmph::StringPiece> >
+{ typedef seeded_hash_function<Murmur3StringPiece> hash_function; };
+
+template <> struct seeded_hash<std::hash<char> >
+{ typedef seeded_hash_function<Murmur3> hash_function; };
+template <> struct seeded_hash<std::hash<unsigned char> >
+{ typedef seeded_hash_function<Murmur3> hash_function; };
+template <> struct seeded_hash<std::hash<short> >
+{ typedef seeded_hash_function<Murmur3> hash_function; };
+template <> struct seeded_hash<std::hash<unsigned short> >
+{ typedef seeded_hash_function<Murmur3> hash_function; };
+template <> struct seeded_hash<std::hash<int> >
+{ typedef seeded_hash_function<Murmur3> hash_function; };
+template <> struct seeded_hash<std::hash<unsigned int> >
+{ typedef seeded_hash_function<Murmur3> hash_function; };
+template <> struct seeded_hash<std::hash<long> >
+{ typedef seeded_hash_function<Murmur3> hash_function; };
+template <> struct seeded_hash<std::hash<unsigned long> >
+{ typedef seeded_hash_function<Murmur3> hash_function; };
+template <> struct seeded_hash<std::hash<long long> >
+{ typedef seeded_hash_function<Murmur3> hash_function; };
+template <> struct seeded_hash<std::hash<unsigned long long> >
+{ typedef seeded_hash_function<Murmur3> hash_function; };
+
+}  // namespace cxxmph
+
+#endif  // __CXXMPH_SEEDED_HASH_H__
--- a/cxxmph/stringpiece.h
+++ b/cxxmph/stringpiece.h
@@ -0,0 +1,182 @@
+// Copyright 2001-2010 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// A string-like object that points to a sized piece of memory.
+//
+// Functions or methods may use const StringPiece& parameters to accept either
+// a "const char*" or a "string" value that will be implicitly converted to
+// a StringPiece.  The implicit conversion means that it is often appropriate
+// to include this .h file in other files rather than forward-declaring
+// StringPiece as would be appropriate for most other Google classes.
+//
+// Systematic usage of StringPiece is encouraged as it will reduce unnecessary
+// conversions from "const char*" to "string" and back again.
+//
+//
+// Arghh!  I wish C++ literals were "string".
+
+#ifndef CXXMPH_STRINGPIECE_H__
+#define CXXMPH_STRINGPIECE_H__
+
+#include <cstddef>
+#include <string.h>
+#include <iosfwd>
+#include <string>
+
+namespace cxxmph {
+
+class StringPiece {
+ private:
+  const char*   ptr_;
+  int           length_;
+
+ public:
+  // We provide non-explicit singleton constructors so users can pass
+  // in a "const char*" or a "string" wherever a "StringPiece" is
+  // expected.
+  StringPiece() : ptr_(NULL), length_(0) { }
+  StringPiece(const char* str)
+    : ptr_(str), length_((str == NULL) ? 0 : static_cast<int>(strlen(str))) { }
+  StringPiece(const std::string& str)
+    : ptr_(str.data()), length_(static_cast<int>(str.size())) { }
+  StringPiece(const char* offset, int len) : ptr_(offset), length_(len) { }
+
+  // data() may return a pointer to a buffer with embedded NULs, and the
+  // returned buffer may or may not be null terminated.  Therefore it is
+  // typically a mistake to pass data() to a routine that expects a NUL
+  // terminated string.
+  const char* data() const { return ptr_; }
+  int size() const { return length_; }
+  int length() const { return length_; }
+  bool empty() const { return length_ == 0; }
+
+  void clear() { ptr_ = NULL; length_ = 0; }
+  void set(const char* data, int len) { ptr_ = data; length_ = len; }
+  void set(const char* str) {
+    ptr_ = str;
+    if (str != NULL)
+      length_ = static_cast<int>(strlen(str));
+    else
+      length_ = 0;
+  }
+  void set(const void* data, int len) {
+    ptr_ = reinterpret_cast<const char*>(data);
+    length_ = len;
+  }
+
+  char operator[](int i) const { return ptr_[i]; }
+
+  void remove_prefix(int n) {
+    ptr_ += n;
+    length_ -= n;
+  }
+
+  void remove_suffix(int n) {
+    length_ -= n;
+  }
+
+  int compare(const StringPiece& x) const {
+    int r = memcmp(ptr_, x.ptr_, std::min(length_, x.length_));
+    if (r == 0) {
+      if (length_ < x.length_) r = -1;
+      else if (length_ > x.length_) r = +1;
+    }
+    return r;
+  }
+
+  std::string as_string() const {
+    return std::string(data(), size());
+  }
+  // We also define ToString() here, since many other string-like
+  // interfaces name the routine that converts to a C++ string
+  // "ToString", and it's confusing to have the method that does that
+  // for a StringPiece be called "as_string()".  We also leave the
+  // "as_string()" method defined here for existing code.
+  std::string ToString() const {
+    return std::string(data(), size());
+  }
+
+  void CopyToString(std::string* target) const;
+  void AppendToString(std::string* target) const;
+
+  // Does "this" start with "x"
+  bool starts_with(const StringPiece& x) const {
+    return ((length_ >= x.length_) &&
+            (memcmp(ptr_, x.ptr_, x.length_) == 0));
+  }
+
+  // Does "this" end with "x"
+  bool ends_with(const StringPiece& x) const {
+    return ((length_ >= x.length_) &&
+            (memcmp(ptr_ + (length_-x.length_), x.ptr_, x.length_) == 0));
+  }
+
+  // standard STL container boilerplate
+  typedef char value_type;
+  typedef const char* pointer;
+  typedef const char& reference;
+  typedef const char& const_reference;
+  typedef size_t size_type;
+  typedef ptrdiff_t difference_type;
+  static const size_type npos;
+  typedef const char* const_iterator;
+  typedef const char* iterator;
+  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+  typedef std::reverse_iterator<iterator> reverse_iterator;
+  iterator begin() const { return ptr_; }
+  iterator end() const { return ptr_ + length_; }
+  const_reverse_iterator rbegin() const {
+    return const_reverse_iterator(ptr_ + length_);
+  }
+  const_reverse_iterator rend() const {
+    return const_reverse_iterator(ptr_);
+  }
+  // STLS says return size_type, but Google says return int
+  int max_size() const { return length_; }
+  int capacity() const { return length_; }
+
+  int copy(char* buf, size_type n, size_type pos = 0) const;
+
+  int find(const StringPiece& s, size_type pos = 0) const;
+  int find(char c, size_type pos = 0) const;
+  int rfind(const StringPiece& s, size_type pos = npos) const;
+  int rfind(char c, size_type pos = npos) const;
+
+  StringPiece substr(size_type pos, size_type n = npos) const;
+};
+
+inline bool operator==(const StringPiece& x, const StringPiece& y) {
+  return x.length() == y.length() && memcmp(x.data(), y.data(), x.length()) == 0;
+}
+
+inline bool operator!=(const StringPiece& x, const StringPiece& y) {
+  return !(x == y);
+}
+
+inline bool operator<(const StringPiece& x, const StringPiece& y) {
+  const int r = memcmp(x.data(), y.data(),
+                       std::min(x.size(), y.size()));
+  return ((r < 0) || ((r == 0) && (x.size() < y.size())));
+}
+
+inline bool operator>(const StringPiece& x, const StringPiece& y) {
+  return y < x;
+}
+
+inline bool operator<=(const StringPiece& x, const StringPiece& y) {
+  return !(x > y);
+}
+
+inline bool operator>=(const StringPiece& x, StringPiece& y) {
+  return !(x < y);
+}
+
+}  // namespace cxxmph
+
+// allow StringPiece to be logged
+inline std::ostream& operator<<(std::ostream& o, const cxxmph::StringPiece& piece) {
+  o << piece.as_string(); return o;
+}
+
+#endif  // CXXMPH_STRINGPIECE_H__
--- a/cxxmph/trigraph.cc
+++ b/cxxmph/trigraph.cc
@@ -0,0 +1,81 @@
+#include <cassert>
+#include <limits>
+#include <iostream>
+
+#include "trigraph.h"
+
+using std::cerr;
+using std::endl;
+using std::vector;
+
+namespace {
+static const uint32_t kInvalidEdge = std::numeric_limits<uint32_t>::max();
+} 
+
+namespace cxxmph {
+
+TriGraph::TriGraph(uint32_t nvertices, uint32_t nedges)
+      : nedges_(0),
+        edges_(nedges),
+        next_edge_(nedges),
+        first_edge_(nvertices, kInvalidEdge),
+        vertex_degree_(nvertices, 0) { }
+
+void TriGraph::ExtractEdgesAndClear(vector<Edge>* edges) {
+  vector<Edge>().swap(next_edge_);
+  vector<uint32_t>().swap(first_edge_);
+  vector<uint8_t>().swap(vertex_degree_);
+  nedges_ = 0;
+  edges->swap(edges_);
+}
+void TriGraph::AddEdge(const Edge& edge) { 
+  edges_[nedges_] = edge; 
+  assert(first_edge_.size() > edge[0]);
+  assert(first_edge_.size() > edge[1]);
+  assert(first_edge_.size() > edge[0]);
+  assert(first_edge_.size() > edge[1]);
+  assert(first_edge_.size() > edge[2]);
+  assert(next_edge_.size() > nedges_);
+  next_edge_[nedges_] = Edge(
+      first_edge_[edge[0]], first_edge_[edge[1]], first_edge_[edge[2]]);
+   first_edge_[edge[0]] = first_edge_[edge[1]] = first_edge_[edge[2]] = nedges_;
+   ++vertex_degree_[edge[0]];
+   ++vertex_degree_[edge[1]];
+   ++vertex_degree_[edge[2]];
+   ++nedges_;
+}
+
+void TriGraph::RemoveEdge(uint32_t current_edge) {
+  // cerr << "Removing edge " << current_edge << " from " << nedges_ << " existing edges " << endl;
+  for (int i = 0; i < 3; ++i) {
+    uint32_t vertex = edges_[current_edge][i];
+    uint32_t edge1 = first_edge_[vertex];
+    uint32_t edge2 = kInvalidEdge;
+    uint32_t j = 0;
+    while (edge1 != current_edge && edge1 != kInvalidEdge) {
+      edge2 = edge1;
+      if (edges_[edge1][0] == vertex) j = 0;
+      else if (edges_[edge1][1] == vertex) j = 1;
+      else j = 2;
+      edge1 = next_edge_[edge1][j];
+    }
+    assert(edge1 != kInvalidEdge);
+    if (edge2 != kInvalidEdge) next_edge_[edge2][j] = next_edge_[edge1][i];
+    else first_edge_[vertex] = next_edge_[edge1][i];
+    --vertex_degree_[vertex];
+  }
+}
+
+void TriGraph::DebugGraph() const {
+  int i;
+  for(i = 0; i < edges_.size(); i++){
+    cerr << i << "  " << edges_[i][0] << " " << edges_[i][1] << " " << edges_[i][2]
+         << " nexts " << next_edge_[i][0] << " " << next_edge_[i][1] << " " << next_edge_[i][2] << endl;
+  }
+  for(i = 0; i < first_edge_.size();i++){
+    cerr << "first for vertice " <<i << " " << first_edge_[i] << endl;
+  }
+}
+
+     
+}  // namespace cxxmph
--- a/cxxmph/trigraph.h
+++ b/cxxmph/trigraph.h
@@ -0,0 +1,48 @@
+#ifndef __CXXMPH_TRIGRAPH_H__
+#define __CXXMPH_TRIGRAPH_H__
+// Build a trigraph using a memory efficient representation.
+//
+// Prior knowledge of the number of edges and vertices for the graph is
+// required. For each vertex, we store how many edges touch it (degree) and the
+// index of the first edge in the vector of triples representing the edges.
+
+#include <stdint.h>  // for uint32_t and friends
+
+#include <vector>
+
+namespace cxxmph {
+
+class TriGraph {
+ public:
+  struct Edge {
+    Edge() { }
+    Edge(uint32_t v0, uint32_t v1, uint32_t v2) { 
+      vertices[0] = v0;
+      vertices[1] = v1;
+      vertices[2] = v2;
+    }
+    uint32_t& operator[](uint8_t v) { return vertices[v]; }
+    const uint32_t& operator[](uint8_t v) const { return vertices[v]; }
+    uint32_t vertices[3];
+  };
+  TriGraph(uint32_t nedges, uint32_t nvertices);
+  void AddEdge(const Edge& edge);
+  void RemoveEdge(uint32_t edge_id);
+  void ExtractEdgesAndClear(std::vector<Edge>* edges);
+  void DebugGraph() const;
+
+  const std::vector<Edge>& edges() const { return edges_; }
+  const std::vector<uint8_t>& vertex_degree() const { return vertex_degree_; }
+  const std::vector<uint32_t>& first_edge() const { return first_edge_; }
+
+ private:
+  uint32_t nedges_;  // total number of edges
+  std::vector<Edge> edges_;
+  std::vector<Edge> next_edge_;  // for implementing removal
+  std::vector<uint32_t> first_edge_;  // the first edge for this vertex
+  std::vector<uint8_t> vertex_degree_;  // number of edges for this vertex
+};
+
+}  // namespace cxxmph
+
+#endif  // __CXXMPH_TRIGRAPH_H__
--- a/cxxmph/trigraph_test.cc
+++ b/cxxmph/trigraph_test.cc
@@ -0,0 +1,22 @@
+#include <cassert>
+
+#include "trigraph.h"
+
+using cxxmph::TriGraph;
+
+int main(int argc, char** argv) {
+  TriGraph g(4, 2);
+  g.AddEdge(TriGraph::Edge(0, 1, 2));
+  g.AddEdge(TriGraph::Edge(1, 3, 2));
+  assert(g.vertex_degree()[0] == 1);
+  assert(g.vertex_degree()[1] == 2);
+  assert(g.vertex_degree()[2] == 2);
+  assert(g.vertex_degree()[3] == 1);
+  g.RemoveEdge(0);
+  assert(g.vertex_degree()[0] == 0);
+  assert(g.vertex_degree()[1] == 1);
+  assert(g.vertex_degree()[2] == 1);
+  assert(g.vertex_degree()[3] == 1);
+  std::vector<TriGraph::Edge> edges;
+  g.ExtractEdgesAndClear(&edges);
+}
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1,8 +1,9 @@
 bin_PROGRAMS = cmph
+noinst_PROGRAMS = bm_numbers
 lib_LTLIBRARIES = libcmph.la
 include_HEADERS = cmph.h cmph_types.h cmph_time.h chd_ph.h
 libcmph_la_SOURCES =  hash.h hash.c \
-		      jenkins_hash.h jenkins_hash.c\
+		      jenkins_hash.h jenkins_hash.c MurmurHash2.h\
 		      hash_state.h debug.h \
 		      vstack.h vstack.c vqueue.h vqueue.c\
 		      graph.h graph.c bitbool.h \
@@ -23,9 +24,14 @@ libcmph_la_SOURCES =  hash.h hash.c \
 		      select.h select.c select_lookup_tables.h \
 		      compressed_seq.h compressed_seq.c \
 		      compressed_rank.h compressed_rank.c \
+                      linear_string_map.h linear_string_map.c \
+		      cmph_benchmark.h cmph_benchmark.c \
 		      cmph_time.h

 libcmph_la_LDFLAGS = -version-info 0:0:0

 cmph_SOURCES = 	main.c wingetopt.h wingetopt.c
 cmph_LDADD = libcmph.la
+
+bm_numbers_SOURCES = bm_numbers.c
+bm_numbers_LDADD = libcmph.la
--- a/src/bdz.c
+++ b/src/bdz.c
@@ -9,7 +9,7 @@
 #include <stdio.h>
 #include <assert.h>
 #include <string.h>
-//#define DEBUG
+// #define DEBUG
 #include "debug.h"
 #define UNASSIGNED 3U
 #define NULL_EDGE 0xffffffff
@@ -115,10 +115,12 @@ static void bdz_dump_graph(bdz_graph3_t* graph3, cmph_uint32 nedges, cmph_uint32
 				graph3->edges[i].next_edges[1],graph3->edges[i].next_edges[2]);
 	};
 	
+        #ifdef DEBUG
 	for(i=0;i<nvertices;i++){
 		printf("\nfirst for vertice %d %d ",i,graph3->first_edge[i]);
 	
 	};
+        #endif
 };

 static void bdz_remove_edge(bdz_graph3_t * graph3, cmph_uint32 curr_edge)
@@ -177,9 +179,14 @@ static int bdz_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_que
 			}
 		};
 	};
+        DEBUGP("Queue head %d Queue tail %d\n", queue_head, queue_tail);
+        #ifdef DEBUG
+	bdz_dump_graph(graph3,graph3->nedges,graph3->nedges+graph3->nedges/4);
+        #endif
 	while(queue_tail!=queue_head){
 		curr_edge=queue[queue_tail++];
 		bdz_remove_edge(graph3,curr_edge);
+		DEBUGP("Removing edge %d\n", curr_edge);
 		v0=graph3->edges[curr_edge].vertices[0];
 		v1=graph3->edges[curr_edge].vertices[1];
 		v2=graph3->edges[curr_edge].vertices[2];
@@ -220,7 +227,7 @@ bdz_config_data_t *bdz_config_new(void)
 {
 	bdz_config_data_t *bdz;
 	bdz = (bdz_config_data_t *)malloc(sizeof(bdz_config_data_t));
-	assert(bdz);
+        if (!bdz) return NULL;
 	memset(bdz, 0, sizeof(bdz_config_data_t));
 	bdz->hashfunc = CMPH_HASH_JENKINS;
 	bdz->g = NULL;
@@ -403,6 +410,7 @@ static int bdz_mapping(cmph_config_t *mph, bdz_graph3_t* graph3, bdz_queue_t que
 		h0 = hl[0] % bdz->r;
 		h1 = hl[1] % bdz->r + bdz->r;
 		h2 = hl[2] % bdz->r + (bdz->r << 1);
+                DEBUGP("Key: %.*s (%u %u %u)\n", keylen, key, h0, h1, h2);
 		mph->key_source->dispose(mph->key_source->data, key, keylen);
 		bdz_add_edge(graph3,h0,h1,h2);
 	}
@@ -427,7 +435,7 @@ static void assigning(bdz_config_data_t *bdz, bdz_graph3_t* graph3, bdz_queue_t
 		v0=graph3->edges[curr_edge].vertices[0];
 		v1=graph3->edges[curr_edge].vertices[1];
 		v2=graph3->edges[curr_edge].vertices[2];
-		DEBUGP("B:%u %u %u -- %u %u %u\n", v0, v1, v2, GETVALUE(bdz->g, v0), GETVALUE(bdz->g, v1), GETVALUE(bdz->g, v2));
+		DEBUGP("B:%u %u %u -- %u %u %u edge %u\n", v0, v1, v2, GETVALUE(bdz->g, v0), GETVALUE(bdz->g, v1), GETVALUE(bdz->g, v2), curr_edge);
 		if(!GETBIT(marked_vertices, v0)){
 			if(!GETBIT(marked_vertices,v1))
 			{
@@ -572,7 +580,9 @@ static inline cmph_uint32 rank(cmph_uint32 b, cmph_uint32 * ranktable, cmph_uint
 		base_rank += bdz_lookup_table[*(g + beg_idx_b++)];

 	}
+        DEBUGP("base rank %u\n", base_rank);
 	beg_idx_v = beg_idx_b << 2;
+        DEBUGP("beg_idx_v %u\n", beg_idx_v);
 	while(beg_idx_v < vertex)
 	{
 		if(GETVALUE(g, beg_idx_v) != UNASSIGNED) base_rank++;
@@ -592,6 +602,7 @@ cmph_uint32 bdz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
 	hl[1] = hl[1] % bdz->r + bdz->r;
 	hl[2] = hl[2] % bdz->r + (bdz->r << 1);
 	vertex = hl[(GETVALUE(bdz->g, hl[0]) + GETVALUE(bdz->g, hl[1]) + GETVALUE(bdz->g, hl[2])) % 3];
+        DEBUGP("Search found vertex %u\n", vertex);
 	return rank(bdz->b, bdz->ranktable, bdz->g, vertex);
 }

--- a/src/bm_numbers.c
+++ b/src/bm_numbers.c
@@ -0,0 +1,130 @@
+#include <stdlib.h>
+#include <string.h>
+
+#include "bitbool.h"
+#include "cmph.h"
+#include "cmph_benchmark.h"
+#include "linear_string_map.h"
+
+// Generates a vector with random unique 32 bits integers
+cmph_uint32* random_numbers_vector_new(cmph_uint32 size) {
+  cmph_uint32 i = 0;
+  cmph_uint32 dup_bits = sizeof(cmph_uint32)*size*8;
+  char* dup = (char*)malloc(dup_bits/8);
+  cmph_uint32* vec = (cmph_uint32 *)malloc(sizeof(cmph_uint32)*size);
+  memset(dup, 0, dup_bits/8);
+  for (i = 0; i < size; ++i) {
+    cmph_uint32 v = random();
+    while (GETBIT(dup, v % dup_bits)) { v = random(); }
+    SETBIT(dup, v % dup_bits);
+    vec[i] = v;
+  }
+  free(dup);
+  return vec;
+}
+
+int cmph_uint32_cmp(const void *a, const void *b) { 
+  return *(const cmph_uint32*)a - *(const cmph_uint32*)b;
+}
+
+char* create_lsmap_key(CMPH_ALGO algo, int iters) {
+  char mphf_name[128];
+  snprintf(mphf_name, 128, "%s:%u", cmph_names[algo], iters);
+  return strdup(mphf_name);
+}
+
+static cmph_uint32 g_numbers_len = 0;
+static cmph_uint32 *g_numbers = NULL;
+static lsmap_t *g_created_mphf = NULL;
+static lsmap_t *g_expected_probes = NULL;
+static lsmap_t *g_mphf_probes = NULL;
+
+void bm_create(CMPH_ALGO algo, int iters) {
+  cmph_io_adapter_t* source = NULL;
+  cmph_config_t* config = NULL;
+  cmph_t* mphf = NULL;
+
+  if (iters > g_numbers_len) {
+    fprintf(stderr, "No input with proper size.");
+    exit(-1);
+  }
+
+  source = cmph_io_struct_vector_adapter(
+      (void*)g_numbers, sizeof(cmph_uint32),
+      0, sizeof(cmph_uint32), iters);
+  config = cmph_config_new(source);
+  cmph_config_set_algo(config, algo);
+  mphf = cmph_new(config);
+  if (!mphf) {
+    fprintf(stderr, "Failed to create mphf for algorithm %s with %u keys",
+            cmph_names[algo], iters);
+    exit(-1);
+  }
+  cmph_config_destroy(config);
+  cmph_io_struct_vector_adapter_destroy(source);
+  lsmap_append(g_created_mphf, create_lsmap_key(algo, iters), mphf);
+}
+
+void bm_search(CMPH_ALGO algo, int iters) {
+  int i = 0;
+  char *mphf_name;
+  cmph_t* mphf = NULL; 
+
+  mphf_name = create_lsmap_key(algo, iters);
+  mphf = (cmph_t*)lsmap_search(g_created_mphf, mphf_name);
+  free(mphf_name);
+
+  cmph_uint32* count = (cmph_uint32*)malloc(sizeof(cmph_uint32)*iters);  
+  cmph_uint32* hash_count = (cmph_uint32*)malloc(sizeof(cmph_uint32)*iters);  
+
+  for (i = 0; i < iters * 100; ++i) {
+    cmph_uint32 pos = random() % iters;
+    const char* buf = (const char*)(g_numbers + pos);
+    cmph_uint32 h = cmph_search(mphf, buf, sizeof(cmph_uint32));
+    ++count[pos];
+    ++hash_count[h];
+  }
+
+  // Verify correctness later.
+  lsmap_append(g_expected_probes, create_lsmap_key(algo, iters), count);
+  lsmap_append(g_mphf_probes, create_lsmap_key(algo, iters), hash_count);
+}
+
+void verify() { }
+
+#define DECLARE_ALGO(algo) \
+  void bm_create_ ## algo(int iters) { bm_create(algo, iters); } \
+  void bm_search_ ## algo(int iters) { bm_search(algo, iters); }
+
+DECLARE_ALGO(CMPH_BMZ);
+DECLARE_ALGO(CMPH_CHM);
+DECLARE_ALGO(CMPH_BRZ);
+DECLARE_ALGO(CMPH_FCH);
+DECLARE_ALGO(CMPH_BDZ);
+
+int main(int argc, char** argv) {
+  g_numbers_len = 1000 * 1000;
+  g_numbers = random_numbers_vector_new(g_numbers_len);
+  g_created_mphf = lsmap_new();
+  g_expected_probes = lsmap_new();
+  g_mphf_probes = lsmap_new();
+
+  BM_REGISTER(bm_create_CMPH_BMZ, 1000 * 1000);
+  BM_REGISTER(bm_search_CMPH_BMZ, 1000 * 1000);
+  BM_REGISTER(bm_create_CMPH_CHM, 1000 * 1000);
+  BM_REGISTER(bm_search_CMPH_CHM, 1000 * 1000);
+//  BM_REGISTER(bm_create_CMPH_BRZ, 1000 * 1000);
+//  BM_REGISTER(bm_search_CMPH_BRZ, 1000 * 1000);
+//  BM_REGISTER(bm_create_CMPH_FCH, 1000 * 1000);
+//  BM_REGISTER(bm_search_CMPH_FCH, 1000 * 1000);
+  BM_REGISTER(bm_create_CMPH_BDZ, 1000 * 1000);
+  BM_REGISTER(bm_search_CMPH_BDZ, 1000 * 1000);
+  run_benchmarks(argc, argv);
+
+  verify();
+  free(g_numbers);
+  lsmap_foreach_key(g_created_mphf, (void(*)(const char*))free);
+  lsmap_foreach_value(g_created_mphf, (void(*)(void*))cmph_destroy);
+  lsmap_destroy(g_created_mphf);
+  return 0;
+}
--- a/src/bmz.c
+++ b/src/bmz.c
@@ -12,7 +12,7 @@
 #include <assert.h>
 #include <string.h>

-//#define DEBUG
+// #define DEBUG
 #include "debug.h"

 static int bmz_gen_edges(cmph_config_t *mph);
@@ -24,7 +24,7 @@ bmz_config_data_t *bmz_config_new(void)
 {
 	bmz_config_data_t *bmz = NULL;
 	bmz = (bmz_config_data_t *)malloc(sizeof(bmz_config_data_t));
-	assert(bmz);
+        if (!bmz) return NULL;
 	memset(bmz, 0, sizeof(bmz_config_data_t));
 	bmz->hashfuncs[0] = CMPH_HASH_JENKINS;
 	bmz->hashfuncs[1] = CMPH_HASH_JENKINS;
@@ -162,13 +162,19 @@ cmph_t *bmz_new(cmph_config_t *mph, double c)
 	  }
 	  free(used_edges);
 	  free(visited);
-        }while(restart_mapping && iterations_map > 0);
+        } while(restart_mapping && iterations_map > 0);
 	graph_destroy(bmz->graph);
 	bmz->graph = NULL;
 	if (iterations_map == 0)
 	{
 		return NULL;
 	}
+        #ifdef DEBUG
+        fprintf(stderr, "G: ");
+        for (i = 0; i < bmz->n; ++i) fprintf(stderr, "%u ", bmz->g[i]);
+        fprintf(stderr, "\n");
+        #endif
+
 	mphf = (cmph_t *)malloc(sizeof(cmph_t));
 	mphf->algo = mph->algo;
 	bmzf = (bmz_data_t *)malloc(sizeof(bmz_data_t));
@@ -421,19 +427,18 @@ static int bmz_gen_edges(cmph_config_t *mph)
 		char *key = NULL;
 		mph->key_source->read(mph->key_source->data, &key, &keylen);

-//		if (key == NULL)fprintf(stderr, "key = %s -- read BMZ\n", key);
 		h1 = hash(bmz->hashes[0], key, keylen) % bmz->n;
 		h2 = hash(bmz->hashes[1], key, keylen) % bmz->n;
 		if (h1 == h2) if (++h2 >= bmz->n) h2 = 0;
+		DEBUGP("key: %.*s h1: %u h2: %u\n", keylen, key, h1, h2);
 		if (h1 == h2)
 		{
 			if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e);
 			mph->key_source->dispose(mph->key_source->data, key, keylen);
 			return 0;
 		}
-		//DEBUGP("Adding edge: %u -> %u for key %s\n", h1, h2, key);
+		DEBUGP("Adding edge: %u -> %u for key %.*s\n", h1, h2, keylen, key);
 		mph->key_source->dispose(mph->key_source->data, key, keylen);
-//		fprintf(stderr, "key = %s -- dispose BMZ\n", key);
 		multiple_edges = graph_contains_edge(bmz->graph, h1, h2);
 		if (mph->verbosity && multiple_edges) fprintf(stderr, "A non simple graph was generated\n");
 		if (multiple_edges) return 0; // checking multiple edge restriction.
@@ -524,9 +529,9 @@ cmph_uint32 bmz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
 	bmz_data_t *bmz = mphf->data;
 	cmph_uint32 h1 = hash(bmz->hashes[0], key, keylen) % bmz->n;
 	cmph_uint32 h2 = hash(bmz->hashes[1], key, keylen) % bmz->n;
-	DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
+	DEBUGP("key: %.*s h1: %u h2: %u\n", keylen, key, h1, h2);
 	if (h1 == h2 && ++h2 > bmz->n) h2 = 0;
-	DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, bmz->g[h1], bmz->g[h2], bmz->m);
+	DEBUGP("key: %.*s g[h1]: %u g[h2]: %u edges: %u\n", keylen, key, bmz->g[h1], bmz->g[h2], bmz->m);
 	return bmz->g[h1] + bmz->g[h2];
 }
 void bmz_destroy(cmph_t *mphf)
--- a/src/bmz8.c
+++ b/src/bmz8.c
@@ -23,7 +23,7 @@ bmz8_config_data_t *bmz8_config_new(void)
 {
 	bmz8_config_data_t *bmz8;
 	bmz8 = (bmz8_config_data_t *)malloc(sizeof(bmz8_config_data_t));
-	assert(bmz8);
+        if (!bmz8) return NULL;
 	memset(bmz8, 0, sizeof(bmz8_config_data_t));
 	bmz8->hashfuncs[0] = CMPH_HASH_JENKINS;
 	bmz8->hashfuncs[1] = CMPH_HASH_JENKINS;
--- a/src/brz.c
+++ b/src/brz.c
@@ -28,6 +28,7 @@ brz_config_data_t *brz_config_new(void)
 {
 	brz_config_data_t *brz = NULL;
 	brz = (brz_config_data_t *)malloc(sizeof(brz_config_data_t));
+        if (!brz) return NULL;
 	brz->algo = CMPH_FCH;
 	brz->b = 128;
 	brz->hashfuncs[0] = CMPH_HASH_JENKINS;
@@ -982,4 +983,3 @@ cmph_uint32 brz_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke
 		default: assert(0);
 	}
 }
-
--- a/src/buffer_entry.c
+++ b/src/buffer_entry.c
@@ -17,7 +17,7 @@ struct __buffer_entry_t
 buffer_entry_t * buffer_entry_new(cmph_uint32 capacity)
 {
 	buffer_entry_t *buff_entry = (buffer_entry_t *)malloc(sizeof(buffer_entry_t));
-	assert(buff_entry);
+        if (!buff_entry) return NULL;
 	buff_entry->fd = NULL;
 	buff_entry->buff = NULL;
 	buff_entry->capacity = capacity;
--- a/src/buffer_manage.c
+++ b/src/buffer_manage.c
@@ -16,7 +16,7 @@ buffer_manage_t * buffer_manage_new(cmph_uint32 memory_avail, cmph_uint32 nentri
 {
 	cmph_uint32 memory_avail_entry, i;
 	buffer_manage_t *buff_manage = (buffer_manage_t *)malloc(sizeof(buffer_manage_t));
-	assert(buff_manage);
+        if (!buff_manage) return NULL;
 	buff_manage->memory_avail = memory_avail;
 	buff_manage->buffer_entries = (buffer_entry_t **)calloc((size_t)nentries, sizeof(buffer_entry_t *));
 	buff_manage->memory_avail_list = (cmph_uint32 *)calloc((size_t)nentries, sizeof(cmph_uint32));
--- a/src/buffer_manager.c
+++ b/src/buffer_manager.c
@@ -16,7 +16,7 @@ buffer_manager_t * buffer_manager_new(cmph_uint32 memory_avail, cmph_uint32 nent
 {
 	cmph_uint32 memory_avail_entry, i;
 	buffer_manager_t *buff_manager = (buffer_manager_t *)malloc(sizeof(buffer_manager_t));
-	assert(buff_manager);
+        if (!buff_manager) return NULL;
 	buff_manager->memory_avail = memory_avail;
 	buff_manager->buffer_entries = (buffer_entry_t **)calloc((size_t)nentries, sizeof(buffer_entry_t *));
 	buff_manager->memory_avail_list = (cmph_uint32 *)calloc((size_t)nentries, sizeof(cmph_uint32));
--- a/src/chd.c
+++ b/src/chd.c
@@ -18,7 +18,7 @@ chd_config_data_t *chd_config_new(cmph_config_t *mph)
 	cmph_io_adapter_t *key_source = mph->key_source;
 	chd_config_data_t *chd;
 	chd = (chd_config_data_t *)malloc(sizeof(chd_config_data_t));
-	assert(chd);
+        if (!chd) return NULL;
 	memset(chd, 0, sizeof(chd_config_data_t));

 	chd->chd_ph = cmph_config_new(key_source);
@@ -268,5 +268,3 @@ cmph_uint32 chd_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke
 	register cmph_uint8 * packed_chd_phf = ((cmph_uint8 *) ptr) + packed_cr_size + sizeof(cmph_uint32);
 	return _chd_search(packed_chd_phf, ptr, key, keylen);
 }
-
-
--- a/src/chd_ph.c
+++ b/src/chd_ph.c
@@ -145,7 +145,7 @@ chd_ph_config_data_t *chd_ph_config_new(void)
 {
 	chd_ph_config_data_t *chd_ph;
 	chd_ph = (chd_ph_config_data_t *)malloc(sizeof(chd_ph_config_data_t));
-	assert(chd_ph);
+        if (!chd_ph) return NULL;
 	memset(chd_ph, 0, sizeof(chd_ph_config_data_t));

 	chd_ph->hashfunc = CMPH_HASH_JENKINS;
@@ -983,6 +983,3 @@ cmph_uint32 chd_ph_search_packed(void *packed_mphf, const char *key, cmph_uint32
 	position = (cmph_uint32)((f + ((cmph_uint64 )h)*probe0_num + probe1_num) % n);
 	return position;
 }
-
-
-
--- a/src/chm.c
+++ b/src/chm.c
@@ -21,7 +21,7 @@ chm_config_data_t *chm_config_new(void)
 {
 	chm_config_data_t *chm = NULL;
 	chm = (chm_config_data_t *)malloc(sizeof(chm_config_data_t));
-	assert(chm);
+        if (!chm) return NULL;
 	memset(chm, 0, sizeof(chm_config_data_t));
 	chm->hashfuncs[0] = CMPH_HASH_JENKINS;
 	chm->hashfuncs[1] = CMPH_HASH_JENKINS;
--- a/src/cmph.c
+++ b/src/cmph.c
@@ -13,7 +13,7 @@
 #include <stdlib.h>
 #include <assert.h>
 #include <string.h>
-//#define DEBUG
+// #define DEBUG
 #include "debug.h"

 const char *cmph_names[] = {"bmz", "bmz8", "chm", "brz", "fch", "bdz", "bdz_ph", "chd_ph", "chd", NULL };
@@ -158,6 +158,10 @@ static cmph_uint32 count_nlfile_keys(FILE *fd)
 		char buf[BUFSIZ];
 		ptr = fgets(buf, BUFSIZ, fd);
 		if (feof(fd)) break;
+                if (ferror(fd) || ptr == NULL) {
+                  perror("Error reading input file");
+                  return 0;
+                }
 		if (buf[strlen(buf) - 1] != '\n') continue;
 		++count;
 	}
--- a/src/cmph_benchmark.c
+++ b/src/cmph_benchmark.c
@@ -0,0 +1,129 @@
+// A simple benchmark tool around getrusage
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+
+#include "cmph_benchmark.h"
+
+typedef struct {
+  const char* name;
+  void (*func)(int);
+  int iters;
+  struct rusage begin;
+  struct rusage end;
+} benchmark_t;
+
+static benchmark_t* global_benchmarks = NULL;
+
+/* Subtract the `struct timeval' values X and Y,
+   storing the result in RESULT.
+   Return 1 if the difference is negative, otherwise 0.  */
+
+int timeval_subtract ( 
+    struct timeval *result, struct timeval *x, struct timeval* y) {
+  /* Perform the carry for the later subtraction by updating y. */
+  if (x->tv_usec < y->tv_usec) {
+    int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
+    y->tv_usec -= 1000000 * nsec;
+    y->tv_sec += nsec;
+  }
+  if (x->tv_usec - y->tv_usec > 1000000) {
+    int nsec = (x->tv_usec - y->tv_usec) / 1000000;
+    y->tv_usec += 1000000 * nsec;
+    y->tv_sec -= nsec;
+  }
+
+  /* Compute the time remaining to wait.
+     tv_usec is certainly positive. */
+  result->tv_sec = x->tv_sec - y->tv_sec;
+  result->tv_usec = x->tv_usec - y->tv_usec;
+
+  /* Return 1 if result is negative. */
+  return x->tv_sec < y->tv_sec;
+}
+
+benchmark_t* find_benchmark(const char* name) {
+  benchmark_t* benchmark = global_benchmarks;
+  while (benchmark && benchmark->name != NULL) {
+    if (strcmp(benchmark->name, name) == 0) break;
+    ++benchmark;
+  }
+  if (!benchmark || !benchmark->name) return NULL;
+  return benchmark;
+}
+
+int global_benchmarks_length() {
+  benchmark_t* benchmark = global_benchmarks;
+  int length = 0;
+  if (benchmark == NULL) return 0;
+  while (benchmark->name != NULL) ++length, ++benchmark;
+  return length;
+}
+
+void bm_register(const char* name, void (*func)(int), int iters) {
+  benchmark_t benchmark;
+  int length = global_benchmarks_length();
+  benchmark.name = name;
+  benchmark.func = func;
+  benchmark.iters = iters;
+  assert(!find_benchmark(name));
+  global_benchmarks = realloc(
+      global_benchmarks, (length + 2)*sizeof(benchmark_t));
+  global_benchmarks[length] = benchmark;
+  memset(&benchmark, 0, sizeof(benchmark_t));  // pivot
+  global_benchmarks[length + 1] = benchmark;
+}
+
+void bm_start(const char* name) {
+  benchmark_t* benchmark;
+  struct rusage rs;
+
+  benchmark = find_benchmark(name);
+  assert(benchmark);
+  int ret = getrusage(RUSAGE_SELF, &rs);  
+  if (ret != 0) {
+    perror("rusage failed");    
+    exit(-1);
+  }
+  benchmark->begin = rs;
+  (*benchmark->func)(benchmark->iters);
+}
+
+void bm_end(const char* name) { 
+  benchmark_t* benchmark;
+  struct rusage rs;
+
+  int ret = getrusage(RUSAGE_SELF, &rs);  
+  if (ret != 0) {
+    perror("rusage failed");    
+    exit(-1);
+  }
+
+  benchmark = find_benchmark(name);
+  benchmark->end = rs;
+
+  struct timeval utime;
+  timeval_subtract(&utime, &benchmark->end.ru_utime, &benchmark->begin.ru_utime);
+  struct timeval stime;
+  timeval_subtract(&stime, &benchmark->end.ru_stime, &benchmark->begin.ru_stime);
+  
+  printf("Benchmark: %s\n", benchmark->name);
+  printf("User time used  : %ld.%06ld\n",
+         utime.tv_sec, (long int)utime.tv_usec);
+  printf("System time used: %ld.%06ld\n",
+         stime.tv_sec, (long int)stime.tv_usec);
+  printf("\n");
+}
+ 
+void run_benchmarks(int argc, char** argv) {
+  benchmark_t* benchmark = global_benchmarks;
+  while (benchmark && benchmark->name != NULL) {
+    bm_start(benchmark->name);
+    bm_end(benchmark->name);
+    ++benchmark;
+  }
+}
+
--- a/src/cmph_benchmark.h
+++ b/src/cmph_benchmark.h
@@ -0,0 +1,20 @@
+#ifndef __CMPH_BENCHMARK_H__
+#define __CMPH_BENCHMARK_H__
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+  
+#define BM_REGISTER(func, iters) bm_register(#func, func, iters)
+void bm_register(const char* name, void (*func)(int), int iters);
+void run_benchmarks(int argc, char** argv);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // __CMPH_BENCHMARK_H__
--- a/src/cmph_structs.c
+++ b/src/cmph_structs.c
@@ -65,5 +65,3 @@ cmph_t *__cmph_load(FILE *f)

 	return mphf;
 }
-
-
--- a/src/djb2_hash.c
+++ b/src/djb2_hash.c
@@ -4,6 +4,7 @@
 djb2_state_t *djb2_state_new()
 {
 	djb2_state_t *state = (djb2_state_t *)malloc(sizeof(djb2_state_t));
+        if (!djb2_state) return NULL;
 	state->hashfunc = CMPH_HASH_DJB2;
 	return state;
 }
--- a/src/fch.c
+++ b/src/fch.c
@@ -23,7 +23,7 @@ fch_config_data_t *fch_config_new()
 {
 	fch_config_data_t *fch;
 	fch = (fch_config_data_t *)malloc(sizeof(fch_config_data_t));
-	assert(fch);
+        if (!fch) return NULL;
 	memset(fch, 0, sizeof(fch_config_data_t));
 	fch->hashfuncs[0] = CMPH_HASH_JENKINS;
 	fch->hashfuncs[1] = CMPH_HASH_JENKINS;
@@ -514,4 +514,3 @@ cmph_uint32 fch_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke
 	h1 = mixh10h11h12 (b, p1, p2, h1);
 	return (h2 + g_ptr[h1]) % m;
 }
-
--- a/src/fch_buckets.c
+++ b/src/fch_buckets.c
@@ -116,7 +116,7 @@ fch_buckets_t * fch_buckets_new(cmph_uint32 nbuckets)
 {
 	cmph_uint32 i;
 	fch_buckets_t *buckets = (fch_buckets_t *)malloc(sizeof(fch_buckets_t));
-	assert(buckets);
+        if (!buckets) return NULL;
 	buckets->values = (fch_bucket_t *)calloc((size_t)nbuckets, sizeof(fch_bucket_t));
 	for (i = 0; i < nbuckets; i++) fch_bucket_new(buckets->values + i);
 	assert(buckets->values);
--- a/src/fnv_hash.c
+++ b/src/fnv_hash.c
@@ -4,6 +4,7 @@
 fnv_state_t *fnv_state_new()
 {
 	fnv_state_t *state = (fnv_state_t *)malloc(sizeof(fnv_state_t));
+        if (!state) return NULL;
 	state->hashfunc = CMPH_HASH_FNV;
 	return state;
 }
@@ -41,6 +42,7 @@ void fnv_state_dump(fnv_state_t *state, char **buf, cmph_uint32 *buflen)
 fnv_state_t * fnv_state_copy(fnv_state_t *src_state)
 {
 	fnv_state_t *dest_state = (fnv_state_t *)malloc(sizeof(fnv_state_t));
+        if (!dest_state) return NULL;
 	dest_state->hashfunc = src_state->hashfunc;
 	return dest_state;
 }
--- a/src/graph.c
+++ b/src/graph.c
@@ -8,7 +8,7 @@
 #include "vstack.h"
 #include "bitbool.h"

-//#define DEBUG
+// #define DEBUG
 #include "debug.h"

 /* static const cmph_uint8 bitmask[8] = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 }; */
@@ -176,7 +176,7 @@ static cmph_uint8 find_degree1_edge(graph_t *g, cmph_uint32 v, cmph_uint8 *delet
 {
 	cmph_uint32 edge = g->first[v];
 	cmph_uint8 found = 0;
-	DEBUGP("Checking degree of vertex %u\n", v);
+	DEBUGP("Checking degree of vertex %u connected to edge %u\n", v, edge);
 	if (edge == EMPTY) return 0;
 	else if (!(GETBIT(deleted, abs_edge(edge, 0))))
 	{
@@ -334,5 +334,3 @@ cmph_uint32 graph_next_neighbor(graph_t *g, graph_iterator_t* it)
 	it->edge = g->next[it->edge];
 	return ret;
 }
-	
-
--- a/src/jenkins_hash.c
+++ b/src/jenkins_hash.c
@@ -87,6 +87,7 @@ acceptable.  Do NOT use for cryptographic purposes.
 jenkins_state_t *jenkins_state_new(cmph_uint32 size) //size of hash table
 {
 	jenkins_state_t *state = (jenkins_state_t *)malloc(sizeof(jenkins_state_t));
+        if (!state) return NULL;
 	DEBUGP("Initializing jenkins hash\n");
 	state->seed = ((cmph_uint32)rand() % size);
 	return state;
--- a/src/linear_string_map.c
+++ b/src/linear_string_map.c
@@ -0,0 +1,68 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "linear_string_map.h"
+
+struct __linear_string_map_t {
+  const char *key;
+  void *value;
+  struct __linear_string_map_t* next;
+};
+
+lsmap_t *lsmap_new() {
+  lsmap_t* lsmap = (lsmap_t*)malloc(sizeof(lsmap_t));
+  if (!lsmap) return NULL;
+  lsmap->key = "dummy node";
+  lsmap->next = NULL;
+  return lsmap;
+}
+
+int lsmap_size(lsmap_t *lsmap) {
+  int size = 0;
+  while (lsmap->next != NULL) ++size;
+  return size;
+}
+
+void lsmap_append(lsmap_t *lsmap, const char *key, void *value) {
+  while (lsmap->next != NULL) lsmap = lsmap->next;
+  lsmap->next = (lsmap_t*)malloc(sizeof(lsmap_t));
+  lsmap->key = key;
+  lsmap->value = value;
+  lsmap = lsmap->next;
+  lsmap->key = "dummy node";
+  lsmap->next = NULL;
+}
+
+void* lsmap_search(lsmap_t *lsmap, const char *key) {
+  while (lsmap->next != NULL) {
+    if (strcmp(lsmap->key, key) == 0) {
+      return lsmap->value;
+    }
+    lsmap = lsmap->next;
+  }
+  return NULL;
+}
+
+void lsmap_foreach_key(lsmap_t *lsmap, void (*f)(const char*)) {
+  while (lsmap->next != NULL) {
+    f(lsmap->key);
+    lsmap = lsmap->next;
+  }
+}
+
+void lsmap_foreach_value(lsmap_t *lsmap, void (*f)(void*)) {
+  while (lsmap->next != NULL) {
+    f(lsmap->value);
+    lsmap = lsmap->next;
+  }
+}
+
+void lsmap_destroy(lsmap_t *lsmap) {
+  while (lsmap->next != NULL) {
+    lsmap_t* freeme = lsmap;
+    lsmap = lsmap->next;
+    free(freeme);
+  }
+  free(lsmap);
+}
--- a/src/linear_string_map.h
+++ b/src/linear_string_map.h
@@ -0,0 +1,13 @@
+// A simple linked list based dynamic sized associative map from const char* to
+// void*. Designed to maximize ease of use instead of performance. Should be
+// used in benchmarks and tests only, not to be distributed with the cmph
+// runtime headers.
+
+typedef struct __linear_string_map_t lsmap_t;
+
+lsmap_t *lsmap_new();
+void lsmap_append(lsmap_t *lsmap, const char *key, void *value);
+void* lsmap_search(lsmap_t *lsmap, const char *key);
+void lsmap_foreach_key(lsmap_t* lsmap, void (*f)(const char*));
+void lsmap_foreach_value(lsmap_t* lsmap, void (*f)(void*));
+void lsmap_destroy(lsmap_t* lsmap);
--- a/src/sdbm_hash.c
+++ b/src/sdbm_hash.c
@@ -4,6 +4,7 @@
 sdbm_state_t *sdbm_state_new()
 {
 	sdbm_state_t *state = (sdbm_state_t *)malloc(sizeof(sdbm_state_t));
+        if (!state) return NULL;
 	state->hashfunc = CMPH_HASH_SDBM;
 	return state;
 }
--- a/src/vqueue.c
+++ b/src/vqueue.c
@@ -12,7 +12,7 @@ vqueue_t * vqueue_new(cmph_uint32 capacity)
 {
  size_t capacity_plus_one = capacity + 1;
  vqueue_t *q = (vqueue_t *)malloc(sizeof(vqueue_t));
-  assert(q);
+  if (!q) return NULL;
  q->values = (cmph_uint32 *)calloc(capacity_plus_one, sizeof(cmph_uint32));
  q->beg = q->end = 0;
  q->capacity = (cmph_uint32) capacity_plus_one;
--- a/src/vstack.c
+++ b/src/vstack.c
@@ -76,4 +76,3 @@ void vstack_reserve(vstack_t *stack, cmph_uint32 size)
 		DEBUGP("Increased\n");
 	}
 }
-		
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -1,4 +1,6 @@
-noinst_PROGRAMS = graph_tests packed_mphf_tests mphf_tests select_tests compressed_seq_tests compressed_rank_tests
+TESTS = $(check_PROGRAMS)
+check_PROGRAMS = graph_tests select_tests compressed_seq_tests compressed_rank_tests cmph_benchmark_test
+noinst_PROGRAMS = packed_mphf_tests mphf_tests

 INCLUDES = -I../src/

@@ -19,3 +21,6 @@ compressed_seq_tests_LDADD = ../src/libcmph.la

 compressed_rank_tests_SOURCES = compressed_rank_tests.c
 compressed_rank_tests_LDADD = ../src/libcmph.la
+
+cmph_benchmark_test_SOURCES = cmph_benchmark_test.c
+cmph_benchmark_test_LDADD = ../src/libcmph.la
--- a/tests/cmph_benchmark_test.c
+++ b/tests/cmph_benchmark_test.c
@@ -0,0 +1,23 @@
+#include <unistd.h>  // for sleep
+#include <limits.h>
+
+#include "cmph_benchmark.h"
+
+void bm_sleep(int iters) {
+  sleep(1);
+}
+
+void bm_increment(int iters) {
+  int i, v = 0;
+  for (i = 0; i < INT_MAX; ++i) {
+    v += i;
+  }
+}
+
+int main(int argc, char** argv) {
+  BM_REGISTER(bm_sleep, 1);
+  BM_REGISTER(bm_increment, 1);
+  run_benchmarks(argc, argv);
+  return 0;
+}
+