diff --git a/cxxmph/mph_index.h b/cxxmph/mph_index.h index c397b27..72ea3ef 100644 --- a/cxxmph/mph_index.h +++ b/cxxmph/mph_index.h @@ -67,12 +67,6 @@ class MPHIndex { uint32_t minimal_perfect_hash(const Key& x) const; // Crazy functions. Ignore. - template // must agree with Reset - uint32_t cuckoo_hash(const uint32_t* h, uint8_t nest) const; - template // must agree with Reset - uint8_t cuckoo_nest(const uint32_t* h) const; - template // must agree with Reset - uint32_t cuckoo_nest_index(const Key& x, uint32_t* h) const; template // must agree with Reset void hash_vector(const Key& x, uint32_t* h) const; @@ -190,29 +184,11 @@ bool MPHIndex::Mapping( return false; } -template -uint32_t MPHIndex::cuckoo_hash(const uint32_t* h, uint8_t nest) const { - return (h[nest] % r_) + nest_displacement_[nest]; -} - template void MPHIndex::hash_vector(const Key& key, uint32_t* h) const { SeededHashFcn().hash64(key, hash_seed_[0], h); } -template // must agree with Reset -uint8_t MPHIndex::cuckoo_nest(const uint32_t* h) const { - uint32_t x[4]; - if (!g_size_) return 0; - x[0] = (h[0] % r_) + nest_displacement_[0]; - x[1] = (h[1] % r_) + nest_displacement_[1]; - x[2] = (h[2] % r_) + nest_displacement_[2]; - assert((x[0] >> 2) > 2) > 2) uint32_t MPHIndex::perfect_hash(const Key& key) const { uint32_t h[4]; @@ -255,8 +231,6 @@ class SimpleMPHIndex : public MPHIndex { uint32_t index(const Key& key) const { return MPHIndex::index(key); } uint32_t perfect_hash(const Key& key) const { return MPHIndex::perfect_hash(key); } uint32_t minimal_perfect_hash(const Key& key) const { return MPHIndex::minimal_perfect_hash(key); } - uint8_t cuckoo_nest(const uint32_t* h) const { return MPHIndex::cuckoo_nest(h); } - uint32_t cuckoo_hash(const uint32_t* h, uint8_t nest) const { return MPHIndex::cuckoo_hash(h, nest); } void hash_vector(const Key& key, uint32_t* h) const { MPHIndex::hash_vector(key, h); } }; diff --git a/cxxmph/mph_map.h b/cxxmph/mph_map.h index dd7bb08..9440fe8 100644 --- a/cxxmph/mph_map.h +++ b/cxxmph/mph_map.h @@ -105,34 +105,12 @@ class mph_map { // Experimental functions, not always faster iterator fast_find(const key_type& k); - const_iterator fast_find(const key_type& k) const; iterator slow_find(const key_type& k, uint32_t perfect_hash); const_iterator slow_find(const key_type& k, uint32_t perfect_hash) const; - static const uint8_t kNestCollision = 3; // biggest 2 bit value - void set_nest_value(const uint32_t* h, uint8_t value) { - auto index = get_nest_index(h); - assert(get_nest_index(h) < nests_.size()); - assert(get_nest_index(h) >> 2 < nests_.size()); - assert(value < 4); - nests_.set(index, value); - assert(nests_[index] == value); - } - uint32_t get_nest_value(const uint32_t* h) const { - assert(get_nest_index(h) < nests_.size()); - return nests_[get_nest_index(h)]; - } - uint32_t get_nest_index(const uint32_t* h) const { - assert(nests_.size()); - assert(nests_.size() % 2 == 0); - assert((nests_.size() & (nests_.size() - 1)) == 0); - assert((h[3] % nests_.size()) == (h[3] & (nests_.size() - 1))); - return (h[3] & (nests_.size() - 1)); // a mod 2^n == a & 2^n - 1 - } void pack(); std::vector values_; std::vector present_; - dynamic_2bitset nests_; SimpleMPHIndex::hash_function> index_; // TODO(davi) optimize slack to hold 128 unique bits from hash64 as key typedef unordered_map slack_type; @@ -169,9 +147,6 @@ MPH_MAP_METHOD_DECL(insert_return_type, insert)(const value_type& x) { } values_.push_back(x); present_.push_back(true); - uint32_t h[4]; - index_.hash_vector(x.first, h); - set_nest_value(h, kNestCollision); ++size_; slack_.insert(make_pair(x.first, values_.size() - 1)); if (should_pack) pack(); @@ -195,49 +170,16 @@ MPH_MAP_METHOD_DECL(void_type, pack)() { new_values.reserve(new_values.size() * 2); std::vector new_present(index_.perfect_hash_size(), false); new_present.reserve(new_present.size() * 2); - auto new_nests_size = nextpoweroftwo(ceil(new_values.size())*10000 + 1); - dynamic_2bitset(new_nests_size, true /* fill with 1s */).swap(nests_); - vector used_nests(nests_.size()); - uint32_t collisions = 0; for (iterator it = begin(), it_end = end(); it != it_end; ++it) { size_type id = index_.perfect_hash(it->first); assert(id < new_values.size()); new_values[id] = *it; new_present[id] = true; - uint32_t h[4]; - index_.hash_vector(it->first, h); - // fprintf(stderr, "Nest index: %d\n", get_nest_index(h)); - assert(used_nests.size() > get_nest_index(h)); - if (used_nests[get_nest_index(h)]) { - set_nest_value(h, kNestCollision); - assert(get_nest_value(h) == kNestCollision); - // fprintf(stderr, "Collision at nest index %d among %d positions\n", get_nest_index(h), nests_.size()); - ++collisions; - } else { - set_nest_value(h, index_.cuckoo_nest(h)); - assert(get_nest_value(h) == index_.cuckoo_nest(h)); - assert(index_.perfect_hash(it->first) == index_.cuckoo_hash(h, get_nest_value(h))); - used_nests[get_nest_index(h)] = true; - } } // fprintf(stderr, "Collision ratio: %f\n", collisions*1.0/size()); values_.swap(new_values); present_.swap(new_present); slack_type().swap(slack_); - int32_t fast = 0; - int32_t slow= 0; - for (iterator it = begin(), it_end = end(); it != it_end; ++it) { - uint32_t h[4]; - index_.hash_vector(it->first, h); - if (get_nest_value(h) == kNestCollision) ++slow; - else { - ++fast; - auto cit = values_.begin() + index_.cuckoo_hash(h, get_nest_value(h)); - assert(index_.perfect_hash(it->first) == cit - values_.begin()); - assert(equal_(it->first, cit->first)); - } - } - // fprintf(stderr, "Predicted fast: %d slow %d\n", fast, slow); } MPH_MAP_METHOD_DECL(iterator, begin)() { return make_iterator(values_.begin()); } @@ -252,7 +194,6 @@ MPH_MAP_METHOD_DECL(void_type, clear)() { present_.clear(); slack_.clear(); index_.clear(); - dynamic_2bitset(8, true /* fill with 1s */).swap(nests_); size_ = 0; } @@ -260,7 +201,6 @@ MPH_MAP_METHOD_DECL(void_type, erase)(iterator pos) { present_[pos - begin] = false; uint32_t h[4]; index_.hash_vector(pos->first, &h); - nests_[get_nest_index(h)] = kNestCollision; *pos = value_type(); --size_; } @@ -270,25 +210,6 @@ MPH_MAP_METHOD_DECL(void_type, erase)(const key_type& k) { erase(it); } -MPH_MAP_METHOD_DECL(const_iterator, fast_find)(const key_type& k) const { - uint32_t h[4]; - index_.hash_vector(k, h); - auto nest = get_nest_value(h); - if (__builtin_expect(nest != kNestCollision, 1)) { - ++fast_taken_; - auto vit = values_.begin() + index_.cuckoo_hash(h, nest); - // do not hold for unknown keys - assert(values_.size() != index_.perfect_hash_size() || equal_(k, vit->first)); - if (equal_(k, vit->first)) { - ++fast_; - return make_iterator(vit); - } - } - nest = index_.cuckoo_nest(h); - ++slow_; - return slow_find(k, index_.cuckoo_hash(h, nest)); -} - MPH_MAP_METHOD_DECL(const_iterator, slow_find)(const key_type& k, uint32_t perfect_hash) const { if (__builtin_expect(index_.perfect_hash_size(), 1)) { if (__builtin_expect(present_[perfect_hash], true)) { @@ -304,24 +225,6 @@ MPH_MAP_METHOD_DECL(const_iterator, slow_find)(const key_type& k, uint32_t perfe return end(); } -MPH_MAP_METHOD_DECL(iterator, fast_find)(const key_type& k) { - uint32_t h[4]; - index_.hash_vector(k, h); - auto nest = get_nest_value(h); - if (__builtin_expect(nest != kNestCollision, 1)) { - ++fast_taken_; - auto vit = values_.begin() + index_.cuckoo_hash(h, nest); - assert(values_.size() != index_.perfect_hash_size() || equal_(k, vit->first)); - if (equal_(k, vit->first)) { - ++fast_; - return make_iterator(vit); - } - } - nest = index_.cuckoo_nest(h); - ++slow_; - return slow_find(k, index_.cuckoo_hash(h, nest)); -} - MPH_MAP_METHOD_DECL(iterator, slow_find)(const key_type& k, uint32_t perfect_hash) { if (__builtin_expect(index_.perfect_hash_size(), 1)) { if (__builtin_expect(present_[perfect_hash], true)) {