From dbd4856faee98cb325053ca6fb8a2aca923ebe4c Mon Sep 17 00:00:00 2001 From: Davi Reis Date: Wed, 7 Mar 2012 01:48:20 -0500 Subject: [PATCH] Removed unnecessary seed mod which was breaking on presence of poor hash functions. --- cxxmph/bm_index.cc | 10 ++++------ cxxmph/bm_map.cc | 42 ++++++++++++++++++++---------------------- cxxmph/mph_index.h | 3 +-- cxxmph/mph_map.h | 1 - cxxmph/seeded_hash.h | 9 ++------- 5 files changed, 27 insertions(+), 38 deletions(-) diff --git a/cxxmph/bm_index.cc b/cxxmph/bm_index.cc index e700840..924231c 100644 --- a/cxxmph/bm_index.cc +++ b/cxxmph/bm_index.cc @@ -129,17 +129,15 @@ class BM_STLIndexSearch : public SearchUrlsBenchmark { index.swap(index_); return true; } - std::unordered_map index_; + unordered_map index_; }; int main(int argc, char** argv) { -/* Benchmark::Register(new BM_MPHIndexCreate("URLS100k")); Benchmark::Register(new BM_STLIndexCreate("URLS100k")); - Benchmark::Register(new BM_MPHIndexSearch("URLS100k", 100*1000*1000)); - Benchmark::Register(new BM_STLIndexSearch("URLS100k", 100*1000*1000)); -*/ - Benchmark::Register(new BM_CmphIndexSearch("URLS100k", 100*1000*1000)); + Benchmark::Register(new BM_MPHIndexSearch("URLS100k", 10*1000*1000)); + Benchmark::Register(new BM_STLIndexSearch("URLS100k", 10*1000*1000)); + Benchmark::Register(new BM_CmphIndexSearch("URLS100k", 10*1000*1000)); Benchmark::RunAll(); return 0; } diff --git a/cxxmph/bm_map.cc b/cxxmph/bm_map.cc index 42516b6..8334604 100644 --- a/cxxmph/bm_map.cc +++ b/cxxmph/bm_map.cc @@ -4,29 +4,29 @@ #include "bm_common.h" #include "mph_map.h" - using cxxmph::mph_map; - using std::string; - using std::unordered_map; +using cxxmph::mph_map; +using std::string; +using std::unordered_map; - namespace cxxmph { +namespace cxxmph { - template - const T* myfind(const Container& mymap, const T& k) { - auto it = mymap.find(k); - if (it == mymap.end()) return NULL; - return &it->second; - } +template +const T* myfind(const MapType& mymap, const T& k) { + auto it = mymap.find(k); + if (it == mymap.end()) return NULL; + return &it->second; +} - template - class BM_CreateUrls : public UrlsBenchmark { - public: - BM_CreateUrls(const string& urls_file) : UrlsBenchmark(urls_file) { } - virtual void Run() { - MapType mymap; - for (auto it = urls_.begin(); it != urls_.end(); ++it) { - mymap[*it] = *it; - } +template +class BM_CreateUrls : public UrlsBenchmark { + public: + BM_CreateUrls(const string& urls_file) : UrlsBenchmark(urls_file) { } + virtual void Run() { + MapType mymap; + for (auto it = urls_.begin(); it != urls_.end(); ++it) { + mymap[*it] = *it; } + } }; template @@ -90,14 +90,12 @@ int main(int argc, char** argv) { /* Benchmark::Register(new BM_CreateUrls>("URLS100k")); Benchmark::Register(new BM_CreateUrls>("URLS100k")); - */ Benchmark::Register(new BM_SearchUrls>("URLS100k", 10*1000 * 1000, 0)); Benchmark::Register(new BM_SearchUrls>("URLS100k", 10*1000 * 1000, 0)); Benchmark::Register(new BM_SearchUrls>("URLS100k", 10*1000 * 1000, 0.9)); Benchmark::Register(new BM_SearchUrls>("URLS100k", 10*1000 * 1000, 0.9)); - /* +*/ Benchmark::Register(new BM_SearchUint64>); Benchmark::Register(new BM_SearchUint64>); - */ Benchmark::RunAll(); } diff --git a/cxxmph/mph_index.h b/cxxmph/mph_index.h index 3ee9090..d2e4a01 100644 --- a/cxxmph/mph_index.h +++ b/cxxmph/mph_index.h @@ -138,8 +138,7 @@ bool MPHIndex::Reset(ForwardIterator begin, ForwardIterator end) { std::vector queue; while (1) { // cerr << "Iterations missing: " << iterations << endl; - for (int i = 0; i < 3; ++i) hash_seed_[i] = random() % m_; - // for (int i = 0; i < 3; ++i) hash_seed_[i] = random() + i; + for (int i = 0; i < 3; ++i) hash_seed_[i] = random(); if (Mapping(begin, end, &edges, &queue)) break; else --iterations; if (iterations == 0) break; diff --git a/cxxmph/mph_map.h b/cxxmph/mph_map.h index e574c7c..7541c45 100644 --- a/cxxmph/mph_map.h +++ b/cxxmph/mph_map.h @@ -186,7 +186,6 @@ MPH_MAP_METHOD_DECL(iterator, find)(const key_type& k) { } MPH_MAP_METHOD_DECL(my_int32_t, index)(const key_type& k) const { - assert(slack_.empty()); if (index_.size() == 0) return -1; return index_.index(k); } diff --git a/cxxmph/seeded_hash.h b/cxxmph/seeded_hash.h index 64cb74d..0f9adfa 100644 --- a/cxxmph/seeded_hash.h +++ b/cxxmph/seeded_hash.h @@ -19,13 +19,6 @@ struct seeded_hash_function { } }; -struct seeded_identity_function { - template - uint32_t operator()(const Key& k, uint32_t seed) const { - return k ^ seed; - } -}; - struct Murmur2 { template uint32_t operator()(const Key& k) const { @@ -78,6 +71,7 @@ template <> struct seeded_hash > { typedef seeded_hash_function hash_function; }; template <> struct seeded_hash > { typedef seeded_hash_function hash_function; }; +/* template <> struct seeded_hash > { typedef seeded_hash_function hash_function; }; template <> struct seeded_hash > @@ -90,6 +84,7 @@ template <> struct seeded_hash > { typedef seeded_hash_function hash_function; }; template <> struct seeded_hash > { typedef seeded_hash_function hash_function; }; +*/ } // namespace cxxmph