From 7fe9527459792f910e4434b4cf62f3f11bbf0b4e Mon Sep 17 00:00:00 2001 From: Davi Reis Date: Wed, 14 Mar 2012 21:22:40 -0300 Subject: [PATCH] Interesting point, but get_cuckoo_nest is adding a lot and fast path is not that fast for int64. --- cxxmph/bm_map.cc | 4 ++-- cxxmph/mph_map.h | 45 ++++++++++++++++++++++++++++++++---------- cxxmph/mph_map_test.cc | 10 +++++++--- 3 files changed, 44 insertions(+), 15 deletions(-) diff --git a/cxxmph/bm_map.cc b/cxxmph/bm_map.cc index 0a0b225..51d2ad0 100644 --- a/cxxmph/bm_map.cc +++ b/cxxmph/bm_map.cc @@ -93,8 +93,8 @@ int main(int argc, char** argv) { Benchmark::Register(new BM_CreateUrls>("URLS100k")); Benchmark::Register(new BM_SearchUrls>("URLS100k", 10*1000 * 1000, 0)); Benchmark::Register(new BM_SearchUrls>("URLS100k", 10*1000 * 1000, 0)); - Benchmark::Register(new BM_SearchUrls>("URLS100k", 10*1000 * 1000, 0.9)); - Benchmark::Register(new BM_SearchUrls>("URLS100k", 10*1000 * 1000, 0.9)); + // Benchmark::Register(new BM_SearchUrls>("URLS100k", 10*1000 * 1000, 0.9)); + // Benchmark::Register(new BM_SearchUrls>("URLS100k", 10*1000 * 1000, 0.9)); Benchmark::Register(new BM_SearchUint64>); Benchmark::Register(new BM_SearchUint64>); Benchmark::RunAll(); diff --git a/cxxmph/mph_map.h b/cxxmph/mph_map.h index a291986..471dafd 100644 --- a/cxxmph/mph_map.h +++ b/cxxmph/mph_map.h @@ -120,8 +120,10 @@ class mph_map { } uint32_t get_nest_index(const uint32_t* h) const { assert(nests_.size()); - return h[3] % nests_.size(); // a mod 2^n == a & 2^n - 1 - // return h[3] & (nests_.size() - 1); // a mod 2^n == a & 2^n - 1 + assert(nests_.size() % 2 == 0); + assert((nests_.size() & (nests_.size() - 1)) == 0); + assert((h[3] % nests_.size()) == (h[3] & (nests_.size() - 1))); + return (h[3] & (nests_.size() - 1)); // a mod 2^n == a & 2^n - 1 } void pack(); @@ -135,7 +137,9 @@ class mph_map { size_type size_; mutable uint64_t fast_; + mutable uint64_t fast_taken_; mutable uint64_t slow_; + mutable uint64_t very_slow_; }; MPH_MAP_TMPL_SPEC @@ -149,7 +153,7 @@ MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::mph_map() : size_(0) { } MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::~mph_map() { - fprintf(stderr, "Fast: %d Slow %d ratio %f\n", fast_, slow_, fast_*1.0/slow_); + fprintf(stderr, "Fast taken: %d Fast: %d Slow %d very_slow %d ratio %f\n", fast_taken_, fast_, slow_, very_slow_, fast_*1.0/slow_); } MPH_MAP_METHOD_DECL(insert_return_type, insert)(const value_type& x) { @@ -169,10 +173,15 @@ MPH_MAP_METHOD_DECL(insert_return_type, insert)(const value_type& x) { slack_.insert(make_pair(x.first, values_.size() - 1)); if (should_pack) pack(); it = find(x.first); + slow_ = 0; + very_slow_ = 0; + fast_ = 0; + fast_taken_ = 0; return make_pair(it, true); } MPH_MAP_METHOD_DECL(void_type, pack)() { + // fprintf(stderr, "Paki %d values\n", values_.size()); if (values_.empty()) return; assert(std::unordered_set(make_iterator_first(begin()), make_iterator_first(end())).size() == size()); bool success = index_.Reset( @@ -183,7 +192,7 @@ MPH_MAP_METHOD_DECL(void_type, pack)() { new_values.reserve(new_values.size() * 2); std::vector new_present(index_.perfect_hash_size(), false); new_present.reserve(new_present.size() * 2); - auto new_nests_size = nextpoweroftwo(ceil(new_values.size())*10 + 1); + auto new_nests_size = nextpoweroftwo(ceil(new_values.size())*100 + 1); dynamic_2bitset(new_nests_size, true /* fill with 1s */).swap(nests_); vector used_nests(nests_.size()); uint32_t collisions = 0; @@ -208,15 +217,24 @@ MPH_MAP_METHOD_DECL(void_type, pack)() { used_nests[get_nest_index(h)] = true; } } - for (iterator it = begin(), it_end = end(); it != it_end; ++it) { - uint32_t h[4]; - index_.hash_vector(it->first, h); - assert(get_nest_value(h) == kNestCollision || index_.perfect_hash(it->first) == index_.cuckoo_hash(h, get_nest_value(h))); - } // fprintf(stderr, "Collision ratio: %f\n", collisions*1.0/size()); values_.swap(new_values); present_.swap(new_present); slack_type().swap(slack_); + int32_t fast = 0; + int32_t slow= 0; + for (iterator it = begin(), it_end = end(); it != it_end; ++it) { + uint32_t h[4]; + index_.hash_vector(it->first, h); + if (get_nest_value(h) == kNestCollision) ++slow; + else { + ++fast; + auto cit = values_.begin() + index_.cuckoo_hash(h, get_nest_value(h)); + assert(index_.perfect_hash(it->first) == cit - values_.begin()); + assert(equal_(it->first, cit->first)); + } + } + // fprintf(stderr, "Predicted fast: %d slow %d\n", fast, slow); } MPH_MAP_METHOD_DECL(iterator, begin)() { return make_iterator(values_.begin()); } @@ -231,7 +249,7 @@ MPH_MAP_METHOD_DECL(void_type, clear)() { present_.clear(); slack_.clear(); index_.clear(); - dynamic_2bitset(1, true /* fill with 1s */).swap(nests_); + dynamic_2bitset(8, true /* fill with 1s */).swap(nests_); size_ = 0; } @@ -254,7 +272,10 @@ MPH_MAP_METHOD_DECL(const_iterator, find)(const key_type& k) const { index_.hash_vector(k, h); auto nest = get_nest_value(h); if (__builtin_expect(nest != kNestCollision, 1)) { + ++fast_taken_; auto vit = values_.begin() + index_.cuckoo_hash(h, nest); + // do not hold for unknown keys + assert(values_.size() != index_.perfect_hash_size() || equal_(k, vit->first)); if (equal_(k, vit->first)) { ++fast_; return make_iterator(vit); @@ -273,6 +294,7 @@ MPH_MAP_METHOD_DECL(const_iterator, slow_find)(const key_type& k, uint32_t perfe } } if (__builtin_expect(!slack_.empty(), 0)) { + ++very_slow_; auto sit = slack_.find(k); if (sit != slack_.end()) return make_iterator(values_.begin() + sit->second); } @@ -284,7 +306,9 @@ MPH_MAP_METHOD_DECL(iterator, find)(const key_type& k) { index_.hash_vector(k, h); auto nest = get_nest_value(h); if (__builtin_expect(nest != kNestCollision, 1)) { + ++fast_taken_; auto vit = values_.begin() + index_.cuckoo_hash(h, nest); + assert(values_.size() != index_.perfect_hash_size() || equal_(k, vit->first)); if (equal_(k, vit->first)) { ++fast_; return make_iterator(vit); @@ -303,6 +327,7 @@ MPH_MAP_METHOD_DECL(iterator, slow_find)(const key_type& k, uint32_t perfect_has } } if (__builtin_expect(!slack_.empty(), 0)) { + ++very_slow_; auto sit = slack_.find(k); if (sit != slack_.end()) return make_iterator(values_.begin() + sit->second); } diff --git a/cxxmph/mph_map_test.cc b/cxxmph/mph_map_test.cc index ada71b3..1d489c6 100644 --- a/cxxmph/mph_map_test.cc +++ b/cxxmph/mph_map_test.cc @@ -15,17 +15,20 @@ int main(int argc, char** argv) { for (int i = 0; i < num_keys; ++i) { b.insert(make_pair(i, i)); } - for (int i = 0; i < num_keys; ++i) { - auto it = b.find(i); + b.rehash(b.size()); + fprintf(stderr, "Insertion finished\n"); + for (int i = 0; i < 1000000; ++i) { + auto it = b.find(i % num_keys); if (it == b.end()) { std::cerr << "Failed to find " << i << std::endl; exit(-1); } - if (it->first != it->second || it->first != i) { + if (it->first != it->second || it->first != i % num_keys) { std::cerr << "Found " << it->first << " looking for " << i << std::endl; exit(-1); } } + /* mph_map h; h.insert(std::make_pair("-1",-1)); mph_map::const_iterator it; @@ -55,4 +58,5 @@ int main(int argc, char** argv) { if (key < num_valid && it->second != key) exit(-1); } } + */ }