From d5b579fbd66e23aed538419f544c5b3068545fc5 Mon Sep 17 00:00:00 2001
From: Davi de Castro Reis <davi.reis@gmail.com>
Date: Sun, 3 Jun 2012 03:13:06 -0300
Subject: [PATCH 1/2] Generalized mph_map for trade-offs.

---
 configure.ac       |  2 +-
 cxxmph/bm_map.cc   | 22 ++++++++++-----
 cxxmph/mph_index.h | 68 +++++++++++++++++++++++++++++++++++++++-------
 cxxmph/mph_map.h   | 66 ++++++++++++++++++++++++++------------------
 4 files changed, 114 insertions(+), 44 deletions(-)
diff --git a/configure.ac b/configure.ac
index a149229..6cb34c3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -34,7 +34,7 @@ LDFLAGS="$LIBM $LDFLAGS"
 CFLAGS="-Wall"
 
 AC_PROG_CXX
-CXXFLAGS="-Wall -Wno-unused-function -DNDEBUG -O3 -fomit-frame-pointer $CXXFLAGS"
+CXXFLAGS="$CXXFLAGS -Wall -Wno-unused-function -DNDEBUG -O3 -fomit-frame-pointer"
 AC_ENABLE_CXXMPH
 if test x$cxxmph = xtrue; then
   AC_COMPILE_STDCXX_0X
diff --git a/cxxmph/bm_map.cc b/cxxmph/bm_map.cc
index f777a2b..4dd071c 100644
--- a/cxxmph/bm_map.cc
+++ b/cxxmph/bm_map.cc
@@ -4,16 +4,13 @@
 #include "bm_common.h"
 #include "mph_map.h"
 
-using cxxmph::mph_map;
 using std::string;
-using std::unordered_map;
 
 // Another reference benchmark:
 // http://blog.aggregateknowledge.com/tag/bigmemory/
 
 namespace cxxmph {
 
-
 template <class MapType, class T>
 const T* myfind(const MapType& mymap, const T& k) {
   auto it = mymap.find(k);
@@ -100,13 +97,24 @@ using namespace cxxmph;
 
 int main(int argc, char** argv) {
   srandom(4);
+  Benchmark::Register(new BM_CreateUrls<dense_hash_map<StringPiece, StringPiece>>("URLS100k"));
+  Benchmark::Register(new BM_CreateUrls<std::unordered_map<StringPiece, StringPiece>>("URLS100k"));
   Benchmark::Register(new BM_CreateUrls<mph_map<StringPiece, StringPiece>>("URLS100k"));
-  Benchmark::Register(new BM_CreateUrls<unordered_map<StringPiece, StringPiece>>("URLS100k"));
+  Benchmark::Register(new BM_CreateUrls<sparse_hash_map<StringPiece, StringPiece>>("URLS100k"));
+
+  Benchmark::Register(new BM_SearchUrls<dense_hash_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0));
+  Benchmark::Register(new BM_SearchUrls<std::unordered_map<StringPiece, StringPiece, Murmur3StringPiece>>("URLS100k", 10*1000 * 1000, 0));
   Benchmark::Register(new BM_SearchUrls<mph_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0));
-  Benchmark::Register(new BM_SearchUrls<unordered_map<StringPiece, StringPiece, Murmur3StringPiece>>("URLS100k", 10*1000 * 1000, 0));
+  Benchmark::Register(new BM_SearchUrls<sparse_hash_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0));
+
+  Benchmark::Register(new BM_SearchUrls<dense_hash_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0.9));
+  Benchmark::Register(new BM_SearchUrls<std::unordered_map<StringPiece, StringPiece, Murmur3StringPiece>>("URLS100k", 10*1000 * 1000, 0.9));
   Benchmark::Register(new BM_SearchUrls<mph_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0.9));
-  Benchmark::Register(new BM_SearchUrls<unordered_map<StringPiece, StringPiece, Murmur3StringPiece>>("URLS100k", 10*1000 * 1000, 0.9));
+  Benchmark::Register(new BM_SearchUrls<sparse_hash_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0.9));
+
+  Benchmark::Register(new BM_SearchUint64<dense_hash_map<uint64_t, uint64_t>>);
+  Benchmark::Register(new BM_SearchUint64<std::unordered_map<uint64_t, uint64_t>>);
   Benchmark::Register(new BM_SearchUint64<mph_map<uint64_t, uint64_t>>);
-  Benchmark::Register(new BM_SearchUint64<unordered_map<uint64_t, uint64_t>>);
+  Benchmark::Register(new BM_SearchUint64<sparse_hash_map<uint64_t, uint64_t>>);
   Benchmark::RunAll();
 }
diff --git a/cxxmph/mph_index.h b/cxxmph/mph_index.h
index 1f6ace1..57e9e64 100644
--- a/cxxmph/mph_index.h
+++ b/cxxmph/mph_index.h
@@ -48,8 +48,8 @@ namespace cxxmph {
 
 class MPHIndex {
  public:
-  MPHIndex(double c = 1.23, uint8_t b = 7) :
-      c_(c), b_(b), m_(0), n_(0), k_(0), r_(1),
+  MPHIndex(bool square = false, double c = 1.23, uint8_t b = 7) :
+      c_(c), b_(b), m_(0), n_(0), k_(0), square_(square), r_(1),
       ranktable_(NULL), ranktable_size_(0) { }
   ~MPHIndex();
 
@@ -66,6 +66,8 @@ class MPHIndex {
   uint32_t perfect_hash_size() const { return n_; } 
   template <class SeededHashFcn, class Key>  // must agree with Reset
   uint32_t perfect_hash(const Key& x) const;  // way faster than the minimal
+  template <class SeededHashFcn, class Key>  // must agree with Reset
+  uint32_t perfect_square(const Key& x) const;  // even faster but needs square=true
   uint32_t minimal_perfect_hash_size() const { return size(); }
   template <class SeededHashFcn, class Key>  // must agree with Reset
   uint32_t minimal_perfect_hash(const Key& x) const;
@@ -93,6 +95,7 @@ class MPHIndex {
   uint32_t m_;  // edges count
   uint32_t n_;  // vertex count
   uint32_t k_;  // kth index in ranktable, $k = log_2(n=3r)\varepsilon$
+  bool square_;  // make bit vector size a power of 2
 
   // Values used during search
 
@@ -124,7 +127,7 @@ bool MPHIndex::Reset(
   if ((r_ % 2) == 0) r_ += 1;
   // This can be used to speed mods, but increases occupation too much. 
   // Needs to try http://gmplib.org/manual/Integer-Exponentiation.html instead
-  // r_ = nextpoweroftwo(r_);
+  if (square_) r_ = nextpoweroftwo(r_);
   nest_displacement_[0] = 0;
   nest_displacement_[1] = r_;
   nest_displacement_[2] = (r_ << 1);
@@ -173,6 +176,21 @@ bool MPHIndex::Mapping(
   return false;
 }
 
+template <class SeededHashFcn, class Key>
+uint32_t MPHIndex::perfect_square(const Key& key) const {
+  if (!g_.size()) return 0;
+  h128 h = SeededHashFcn().hash128(key, hash_seed_[0]);
+  h[0] = (h[0] & (r_-1)) + nest_displacement_[0];
+  h[1] = (h[1] & (r_-1)) + nest_displacement_[1];
+  h[2] = (h[2] & (r_-1)) + nest_displacement_[2];
+  assert((h[0]) < g_.size());
+  assert((h[1]) < g_.size());
+  assert((h[2]) < g_.size());
+  uint8_t nest = threebit_mod3[g_[h[0]] + g_[h[1]] + g_[h[2]]];
+  uint32_t vertex = h[nest];
+  return vertex;
+}
+
 template <class SeededHashFcn, class Key>
 uint32_t MPHIndex::perfect_hash(const Key& key) const {
   if (!g_.size()) return 0;
@@ -180,17 +198,14 @@ uint32_t MPHIndex::perfect_hash(const Key& key) const {
   h[0] = (h[0] % r_) + nest_displacement_[0];
   h[1] = (h[1] % r_) + nest_displacement_[1];
   h[2] = (h[2] % r_) + nest_displacement_[2];
-  // h[0] = (h[0] & (r_-1)) + nest_displacement_[0];
-  // h[1] = (h[1] & (r_-1)) + nest_displacement_[1];
-  // h[2] = (h[2] & (r_-1)) + nest_displacement_[2];
   assert((h[0]) < g_.size());
   assert((h[1]) < g_.size());
   assert((h[2]) < g_.size());
-  uint8_t nest = threebit_mod3[
-      g_[h[0]] + g_[h[1]] + g_[h[2]]];
+  uint8_t nest = threebit_mod3[g_[h[0]] + g_[h[1]] + g_[h[2]]];
   uint32_t vertex = h[nest];
   return vertex;
 }
+
 template <class SeededHashFcn, class Key>
 uint32_t MPHIndex::minimal_perfect_hash(const Key& key) const {
   return Rank(perfect_hash<SeededHashFcn, Key>(key));
@@ -206,15 +221,48 @@ uint32_t MPHIndex::index(const Key& key) const {
 template <class Key, class HashFcn = typename seeded_hash<std::hash<Key>>::hash_function>
 class SimpleMPHIndex : public MPHIndex {
  public:
+  SimpleMPHIndex(bool advanced_usage = false) : MPHIndex(advanced_usage) {}
   template <class ForwardIterator>
   bool Reset(ForwardIterator begin, ForwardIterator end, uint32_t size) {
     return MPHIndex::Reset<HashFcn>(begin, end, size);
   }
   uint32_t index(const Key& key) const { return MPHIndex::index<HashFcn>(key); }
-  uint32_t perfect_hash(const Key& key) const { return MPHIndex::perfect_hash<HashFcn>(key); }
-  uint32_t minimal_perfect_hash(const Key& key) const { return MPHIndex::minimal_perfect_hash<HashFcn>(key); }
 };
 
+// The parameters minimal and square trade memory usage for evaluation speed.
+// Minimal decreases speed and memory usage, and square does the opposite.
+// Using minimal=true and square=false is the same as SimpleMPHIndex.
+template <bool minimal, bool square, class Key, class HashFcn>
+struct FlexibleMPHIndex {};
+
+template <class Key, class HashFcn>
+struct FlexibleMPHIndex<true, false, Key, HashFcn> 
+    : public SimpleMPHIndex<Key, HashFcn> {
+  FlexibleMPHIndex() : SimpleMPHIndex<Key, HashFcn>(false) {}
+  uint32_t index(const Key& key) const {
+      return MPHIndex::minimal_perfect_hash<HashFcn>(key); }
+  uint32_t size() const { return MPHIndex::minimal_perfect_hash_size(); }
+};
+template <class Key, class HashFcn>
+struct FlexibleMPHIndex<false, true, Key, HashFcn> 
+    : public SimpleMPHIndex<Key, HashFcn> {
+  FlexibleMPHIndex() : SimpleMPHIndex<Key, HashFcn>(true) {}
+  uint32_t index(const Key& key) const {
+      return MPHIndex::perfect_square<HashFcn>(key); }
+  uint32_t size() const { return MPHIndex::perfect_hash_size(); }
+};
+template <class Key, class HashFcn>
+struct FlexibleMPHIndex<false, false, Key, HashFcn> 
+    : public SimpleMPHIndex<Key, HashFcn> {
+  FlexibleMPHIndex() : SimpleMPHIndex<Key, HashFcn>(false) {}
+  uint32_t index(const Key& key) const {
+      return MPHIndex::index<HashFcn>(key); }
+  uint32_t size() const { return MPHIndex::perfect_hash_size(); }
+};
+// From a trade-off perspective this case does not make much sense.
+// template <class Key, class HashFcn>
+// class FlexibleMPHIndex<true, true, Key, HashFcn>
+
 }  // namespace cxxmph
 
 #endif // __CXXMPH_MPH_INDEX_H__
diff --git a/cxxmph/mph_map.h b/cxxmph/mph_map.h
index 6563232..3fc83d0 100644
--- a/cxxmph/mph_map.h
+++ b/cxxmph/mph_map.h
@@ -5,12 +5,12 @@
 //
 // This class not necessarily faster than unordered_map (or ext/hash_map).
 // Benchmark your code before using it. If you do not call rehash() before
-// starting your reads, it will be definitively slower than unordered_map.
+// starting your reads, it will be very likely slower than unordered_map.
 //
-// For large sets of urls, which are a somewhat expensive to compare, I found
-// this class to be about 10% faster than unordered_map.
+// For large sets of urls (>100k), which are a somewhat expensive to compare, I
+// found this class to be about 10%-30% faster than unordered_map.
 //
-// The space overhead of this map is 1.93 bits per bucket and it achieves 100%
+// The space overhead of this map is 2.6 bits per bucket and it achieves 100%
 // occupation with a rehash call.
 
 #include <algorithm>
@@ -30,17 +30,18 @@ namespace cxxmph {
 
 using std::pair;
 using std::make_pair;
-using std::unordered_map;
 using std::vector;
 
 // Save on repetitive typing.
-#define MPH_MAP_TMPL_SPEC template <class Key, class Data, class HashFcn, class EqualKey, class Alloc>
-#define MPH_MAP_CLASS_SPEC mph_map<Key, Data, HashFcn, EqualKey, Alloc>
+#define MPH_MAP_TMPL_SPEC  \
+    template <bool minimal, bool square, \
+    class Key, class Data, class HashFcn, class EqualKey, class Alloc>
+#define MPH_MAP_CLASS_SPEC mph_map_base<minimal, square, Key, Data, HashFcn, EqualKey, Alloc>
 #define MPH_MAP_METHOD_DECL(r, m) MPH_MAP_TMPL_SPEC typename MPH_MAP_CLASS_SPEC::r MPH_MAP_CLASS_SPEC::m
 #define MPH_MAP_INLINE_METHOD_DECL(r, m) MPH_MAP_TMPL_SPEC inline typename MPH_MAP_CLASS_SPEC::r MPH_MAP_CLASS_SPEC::m
 
-template <class Key, class Data, class HashFcn = std::hash<Key>, class EqualKey = std::equal_to<Key>, class Alloc = std::allocator<Data> >
-class mph_map {
+template <bool minimal, bool square, class Key, class Data, class HashFcn = std::hash<Key>, class EqualKey = std::equal_to<Key>, class Alloc = std::allocator<Data> >
+class mph_map_base {
  public:
   typedef Key key_type;
   typedef Data data_type;
@@ -63,8 +64,8 @@ class mph_map {
   typedef bool bool_type;
   typedef pair<iterator, bool> insert_return_type;
 
-  mph_map();
-  ~mph_map();
+  mph_map_base();
+  ~mph_map_base();
 
   iterator begin();
   iterator end();
@@ -83,7 +84,7 @@ class mph_map {
   data_type& operator[](const key_type &k);
   const data_type& operator[](const key_type &k) const;
 
-  size_type bucket_count() const { return index_.minimal_perfect_hash_size() + slack_.bucket_count(); }
+  size_type bucket_count() const { return index_.size() + slack_.bucket_count(); }
   void rehash(size_type nbuckets /*ignored*/); 
 
  protected:  // mimicking STL implementation
@@ -106,9 +107,9 @@ class mph_map {
    void pack();
    vector<value_type> values_;
    vector<bool> present_;
-   SimpleMPHIndex<Key, typename seeded_hash<HashFcn>::hash_function> index_;
+   FlexibleMPHIndex<minimal, square, Key, typename seeded_hash<HashFcn>::hash_function> index_;
    // TODO(davi) optimize slack to use hash from index rather than calculate its own
-   typedef unordered_map<h128, uint32_t, h128::hash32> slack_type;
+   typedef std::unordered_map<h128, uint32_t, h128::hash32> slack_type;
    slack_type slack_;
    size_type size_;
    typename seeded_hash<HashFcn>::hash_function hasher128_;
@@ -119,13 +120,11 @@ bool operator==(const MPH_MAP_CLASS_SPEC& lhs, const MPH_MAP_CLASS_SPEC& rhs) {
   return lhs.size() == rhs.size() && std::equal(lhs.begin(), lhs.end(), rhs.begin());
 }
 
-MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::mph_map() : size_(0) {
+MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::mph_map_base() : size_(0) {
   clear();
   pack();
 }
-
-MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::~mph_map() {
-}
+MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::~mph_map_base() { }
 
 MPH_MAP_METHOD_DECL(insert_return_type, insert)(const value_type& x) {
   auto it = find(x.first);
@@ -154,13 +153,13 @@ MPH_MAP_METHOD_DECL(void_type, pack)() {
       make_iterator_first(begin()),
       make_iterator_first(end()), size_);
   if (!success) { exit(-1); }
-  vector<value_type> new_values(index_.minimal_perfect_hash_size());
+  vector<value_type> new_values(index_.size());
   new_values.reserve(new_values.size() * 2);
-  vector<bool> new_present(index_.minimal_perfect_hash_size(), false);
+  vector<bool> new_present(index_.size(), false);
   new_present.reserve(new_present.size() * 2);
   for (iterator it = begin(), it_end = end(); it != it_end; ++it) {
-    size_type id = index_.minimal_perfect_hash(it->first);
-    assert(id < index_.minimal_perfect_hash_size());
+    size_type id = index_.index(it->first);
+    assert(id < index_.size());
     assert(id < new_values.size());
     new_values[id] = *it;
     new_present[id] = true;
@@ -216,10 +215,10 @@ MPH_MAP_INLINE_METHOD_DECL(my_int32_t, index)(const key_type& k) const {
      auto sit = slack_.find(hasher128_.hash128(k, 0));
      if (sit != slack_.end()) return sit->second;
   }
-  if (__builtin_expect(index_.minimal_perfect_hash_size(), 1)) {
-    auto minimal_perfect_hash = index_.minimal_perfect_hash(k);
-    if (__builtin_expect(present_[minimal_perfect_hash], true)) { 
-      return minimal_perfect_hash;
+  if (__builtin_expect(index_.size(), 1)) {
+    auto id = index_.index(k);
+    if (__builtin_expect(present_[id], true)) { 
+      return id;
     }
   }
   return -1;
@@ -235,6 +234,21 @@ MPH_MAP_METHOD_DECL(void_type, rehash)(size_type nbuckets) {
   slack_type().swap(slack_);
 }
 
+#define MPH_MAP_PREAMBLE template <class Key, class Data,\
+     class HashFcn = std::hash<Key>, class EqualKey = std::equal_to<Key>,\
+     class Alloc = std::allocator<Data> >
+
+MPH_MAP_PREAMBLE class mph_map : public mph_map_base<
+     false, false, Key, Data, HashFcn, EqualKey, Alloc> {};
+MPH_MAP_PREAMBLE class unordered_map : public mph_map_base<
+     false, false, Key, Data, HashFcn, EqualKey, Alloc> {};
+MPH_MAP_PREAMBLE class hash_map : public mph_map_base<
+     false, false, Key, Data, HashFcn, EqualKey, Alloc> {};
+
+MPH_MAP_PREAMBLE class dense_hash_map : public mph_map_base<
+     false, true, Key, Data, HashFcn, EqualKey, Alloc> {};
+MPH_MAP_PREAMBLE class sparse_hash_map : public mph_map_base<
+     true, false, Key, Data, HashFcn, EqualKey, Alloc> {};
 
 }  // namespace cxxmph
 

From cc42ab3b741c81b49b309618572b6389389a9945 Mon Sep 17 00:00:00 2001
From: Davi de Castro Reis <davi.reis@gmail.com>
Date: Sun, 3 Jun 2012 04:17:14 -0300
Subject: [PATCH 2/2] Pre-release comments.

---
 README             | 14 +++++++++++++-
 README.t2t         | 10 ++++++++++
 cxxmph/mph_index.h |  6 +-----
 cxxmph/mph_map.h   | 24 +++++++++++++++++-------
 4 files changed, 41 insertions(+), 13 deletions(-)

diff --git a/README b/README
index e10e506..f3662a8 100644
--- a/README
+++ b/README
@@ -84,6 +84,18 @@ The CMPH Library encapsulates the newest and more efficient algorithms in an eas
 ----------------------------------------
 
 
+	News for version 2.0
+	====================
+
+Cleaned up most warnings for the c code.
+
+Experimental C++ interface (--enable-cxxmph) implementing the BDZ algorithm in
+a convenient SimpleMPHIndex interface, which serves as the basis
+for drop-in replacements for std::unordered_map, sparsehash::sparse_hash_map
+and sparsehash::dense_hash_map. Faster lookup time at the expense of insertion
+time. See cxxmpph/mph_map.h and cxxmph/mph_index.h for details.
+
+
 	News for version 1.1
 	====================
 
@@ -310,5 +322,5 @@ Fabiano Cupertino Botelho (fc_botelho@users.sourceforge.net)
 
 Nivio Ziviani (nivio@dcc.ufmg.br)
 
-Last Updated: Fri Jun  1 19:04:40 2012
+Last Updated: Sun Jun  3 04:09:55 2012
 
diff --git a/README.t2t b/README.t2t
index 21d851f..a57c2ef 100644
--- a/README.t2t
+++ b/README.t2t
@@ -88,6 +88,16 @@ The CMPH Library encapsulates the newest and more efficient algorithms in an eas
 
 ----------------------------------------
 
+==News for version 2.0==
+
+Cleaned up most warnings for the c code.
+
+Experimental C++ interface (--enable-cxxmph) implementing the BDZ algorithm in
+a convenient interface, which serves as the basis
+for drop-in replacements for std::unordered_map, sparsehash::sparse_hash_map
+and sparsehash::dense_hash_map. Potentially faster lookup time at the expense
+of insertion time. See cxxmpph/mph_map.h and cxxmph/mph_index.h for details.
+
 ==News for version 1.1==
 
 Fixed a bug in the chd_pc algorithm and reorganized tests.
diff --git a/cxxmph/mph_index.h b/cxxmph/mph_index.h
index 57e9e64..a99ec1e 100644
--- a/cxxmph/mph_index.h
+++ b/cxxmph/mph_index.h
@@ -10,16 +10,12 @@
 // This is a pretty uncommon data structure, and if you application has a real
 // use case for it, chances are that it is a real win. If all you are doing is
 // a straightforward implementation of an in-memory associative mapping data
-// structure, then it will probably be slower, since that the
-// evaluation of index() is typically slower than the total cost of running a
-// traditional hash function over a key and doing 2-3 conflict resolutions on
-// 100byte-ish strings. If you still want to do, take a look at mph_map.h
+// structure, then it will probably be slower. Take a look at mph_map.h
 // instead.
 //
 // Thesis presenting this and similar algorithms:
 // http://homepages.dcc.ufmg.br/~fbotelho/en/talks/thesis2008/thesis.pdf
 //
-//
 // Notes:
 //
 // Most users can use the SimpleMPHIndex wrapper instead of the MPHIndex which
diff --git a/cxxmph/mph_map.h b/cxxmph/mph_map.h
index 3fc83d0..554fce4 100644
--- a/cxxmph/mph_map.h
+++ b/cxxmph/mph_map.h
@@ -3,15 +3,25 @@
 // Implementation of the unordered associative mapping interface using a
 // minimal perfect hash function.
 //
-// This class not necessarily faster than unordered_map (or ext/hash_map).
-// Benchmark your code before using it. If you do not call rehash() before
-// starting your reads, it will be very likely slower than unordered_map.
+// Since these are header-mostly libraries, make sure you compile your code
+// with -DNDEBUG and -O3. The code requires a modern C++11 compiler.
+//
+// The container comes in 3 flavors, all in the cxxmph namespace and drop-in
+// replacement for the popular classes with the same names.
+// * dense_hash_map
+//    -> fast, uses more memory, 2.93 bits per bucket, ~50% occupation
+// * unordered_map (aliases:  hash_map, mph_map)
+//    -> middle ground, uses 2.93 bits per bucket, ~81% occupation
+// * sparse_hash_map -> slower, uses 3.6 bits per bucket
+//    -> less fast, uses 3.6 bits per bucket, 100% occupation
+//
+// Those classes are not necessarily faster than their existing counterparts.
+// Benchmark your code before using it. The larger the key, the larger the
+// number of elements inserted, and the bigger the number of failed searches,
+// the more likely those classes will outperform existing code.
 //
 // For large sets of urls (>100k), which are a somewhat expensive to compare, I
-// found this class to be about 10%-30% faster than unordered_map.
-//
-// The space overhead of this map is 2.6 bits per bucket and it achieves 100%
-// occupation with a rehash call.
+// found those class to be about 10%-50% faster than unordered_map.
 
 #include <algorithm>
 #include <iostream>