Removed cuckoo hash failed attempt. Slower because of extra memory access.
This commit is contained in:
parent
11d54ea837
commit
50ac0e2974
|
@ -67,12 +67,6 @@ class MPHIndex {
|
||||||
uint32_t minimal_perfect_hash(const Key& x) const;
|
uint32_t minimal_perfect_hash(const Key& x) const;
|
||||||
|
|
||||||
// Crazy functions. Ignore.
|
// Crazy functions. Ignore.
|
||||||
template <class SeededHashFcn> // must agree with Reset
|
|
||||||
uint32_t cuckoo_hash(const uint32_t* h, uint8_t nest) const;
|
|
||||||
template <class SeededHashFcn> // must agree with Reset
|
|
||||||
uint8_t cuckoo_nest(const uint32_t* h) const;
|
|
||||||
template <class SeededHashFcn, class Key> // must agree with Reset
|
|
||||||
uint32_t cuckoo_nest_index(const Key& x, uint32_t* h) const;
|
|
||||||
template <class SeededHashFcn, class Key> // must agree with Reset
|
template <class SeededHashFcn, class Key> // must agree with Reset
|
||||||
void hash_vector(const Key& x, uint32_t* h) const;
|
void hash_vector(const Key& x, uint32_t* h) const;
|
||||||
|
|
||||||
|
@ -190,29 +184,11 @@ bool MPHIndex::Mapping(
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class SeededHashFcn>
|
|
||||||
uint32_t MPHIndex::cuckoo_hash(const uint32_t* h, uint8_t nest) const {
|
|
||||||
return (h[nest] % r_) + nest_displacement_[nest];
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class SeededHashFcn, class Key>
|
template <class SeededHashFcn, class Key>
|
||||||
void MPHIndex::hash_vector(const Key& key, uint32_t* h) const {
|
void MPHIndex::hash_vector(const Key& key, uint32_t* h) const {
|
||||||
SeededHashFcn().hash64(key, hash_seed_[0], h);
|
SeededHashFcn().hash64(key, hash_seed_[0], h);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class SeededHashFcn> // must agree with Reset
|
|
||||||
uint8_t MPHIndex::cuckoo_nest(const uint32_t* h) const {
|
|
||||||
uint32_t x[4];
|
|
||||||
if (!g_size_) return 0;
|
|
||||||
x[0] = (h[0] % r_) + nest_displacement_[0];
|
|
||||||
x[1] = (h[1] % r_) + nest_displacement_[1];
|
|
||||||
x[2] = (h[2] % r_) + nest_displacement_[2];
|
|
||||||
assert((x[0] >> 2) <g_size_);
|
|
||||||
assert((x[1] >> 2) <g_size_);
|
|
||||||
assert((x[2] >> 2) <g_size_);
|
|
||||||
return (get_2bit_value(g_, x[0]) + get_2bit_value(g_, x[1]) + get_2bit_value(g_, x[2])) % 3;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class SeededHashFcn, class Key>
|
template <class SeededHashFcn, class Key>
|
||||||
uint32_t MPHIndex::perfect_hash(const Key& key) const {
|
uint32_t MPHIndex::perfect_hash(const Key& key) const {
|
||||||
uint32_t h[4];
|
uint32_t h[4];
|
||||||
|
@ -255,8 +231,6 @@ class SimpleMPHIndex : public MPHIndex {
|
||||||
uint32_t index(const Key& key) const { return MPHIndex::index<HashFcn>(key); }
|
uint32_t index(const Key& key) const { return MPHIndex::index<HashFcn>(key); }
|
||||||
uint32_t perfect_hash(const Key& key) const { return MPHIndex::perfect_hash<HashFcn>(key); }
|
uint32_t perfect_hash(const Key& key) const { return MPHIndex::perfect_hash<HashFcn>(key); }
|
||||||
uint32_t minimal_perfect_hash(const Key& key) const { return MPHIndex::minimal_perfect_hash<HashFcn>(key); }
|
uint32_t minimal_perfect_hash(const Key& key) const { return MPHIndex::minimal_perfect_hash<HashFcn>(key); }
|
||||||
uint8_t cuckoo_nest(const uint32_t* h) const { return MPHIndex::cuckoo_nest<HashFcn>(h); }
|
|
||||||
uint32_t cuckoo_hash(const uint32_t* h, uint8_t nest) const { return MPHIndex::cuckoo_hash<HashFcn>(h, nest); }
|
|
||||||
void hash_vector(const Key& key, uint32_t* h) const { MPHIndex::hash_vector<HashFcn>(key, h); }
|
void hash_vector(const Key& key, uint32_t* h) const { MPHIndex::hash_vector<HashFcn>(key, h); }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -105,34 +105,12 @@ class mph_map {
|
||||||
|
|
||||||
// Experimental functions, not always faster
|
// Experimental functions, not always faster
|
||||||
iterator fast_find(const key_type& k);
|
iterator fast_find(const key_type& k);
|
||||||
const_iterator fast_find(const key_type& k) const;
|
|
||||||
iterator slow_find(const key_type& k, uint32_t perfect_hash);
|
iterator slow_find(const key_type& k, uint32_t perfect_hash);
|
||||||
const_iterator slow_find(const key_type& k, uint32_t perfect_hash) const;
|
const_iterator slow_find(const key_type& k, uint32_t perfect_hash) const;
|
||||||
static const uint8_t kNestCollision = 3; // biggest 2 bit value
|
|
||||||
void set_nest_value(const uint32_t* h, uint8_t value) {
|
|
||||||
auto index = get_nest_index(h);
|
|
||||||
assert(get_nest_index(h) < nests_.size());
|
|
||||||
assert(get_nest_index(h) >> 2 < nests_.size());
|
|
||||||
assert(value < 4);
|
|
||||||
nests_.set(index, value);
|
|
||||||
assert(nests_[index] == value);
|
|
||||||
}
|
|
||||||
uint32_t get_nest_value(const uint32_t* h) const {
|
|
||||||
assert(get_nest_index(h) < nests_.size());
|
|
||||||
return nests_[get_nest_index(h)];
|
|
||||||
}
|
|
||||||
uint32_t get_nest_index(const uint32_t* h) const {
|
|
||||||
assert(nests_.size());
|
|
||||||
assert(nests_.size() % 2 == 0);
|
|
||||||
assert((nests_.size() & (nests_.size() - 1)) == 0);
|
|
||||||
assert((h[3] % nests_.size()) == (h[3] & (nests_.size() - 1)));
|
|
||||||
return (h[3] & (nests_.size() - 1)); // a mod 2^n == a & 2^n - 1
|
|
||||||
}
|
|
||||||
|
|
||||||
void pack();
|
void pack();
|
||||||
std::vector<value_type> values_;
|
std::vector<value_type> values_;
|
||||||
std::vector<bool> present_;
|
std::vector<bool> present_;
|
||||||
dynamic_2bitset nests_;
|
|
||||||
SimpleMPHIndex<Key, typename seeded_hash<HashFcn>::hash_function> index_;
|
SimpleMPHIndex<Key, typename seeded_hash<HashFcn>::hash_function> index_;
|
||||||
// TODO(davi) optimize slack to hold 128 unique bits from hash64 as key
|
// TODO(davi) optimize slack to hold 128 unique bits from hash64 as key
|
||||||
typedef unordered_map<Key, uint32_t, HashFcn, EqualKey, Alloc> slack_type;
|
typedef unordered_map<Key, uint32_t, HashFcn, EqualKey, Alloc> slack_type;
|
||||||
|
@ -169,9 +147,6 @@ MPH_MAP_METHOD_DECL(insert_return_type, insert)(const value_type& x) {
|
||||||
}
|
}
|
||||||
values_.push_back(x);
|
values_.push_back(x);
|
||||||
present_.push_back(true);
|
present_.push_back(true);
|
||||||
uint32_t h[4];
|
|
||||||
index_.hash_vector(x.first, h);
|
|
||||||
set_nest_value(h, kNestCollision);
|
|
||||||
++size_;
|
++size_;
|
||||||
slack_.insert(make_pair(x.first, values_.size() - 1));
|
slack_.insert(make_pair(x.first, values_.size() - 1));
|
||||||
if (should_pack) pack();
|
if (should_pack) pack();
|
||||||
|
@ -195,49 +170,16 @@ MPH_MAP_METHOD_DECL(void_type, pack)() {
|
||||||
new_values.reserve(new_values.size() * 2);
|
new_values.reserve(new_values.size() * 2);
|
||||||
std::vector<bool> new_present(index_.perfect_hash_size(), false);
|
std::vector<bool> new_present(index_.perfect_hash_size(), false);
|
||||||
new_present.reserve(new_present.size() * 2);
|
new_present.reserve(new_present.size() * 2);
|
||||||
auto new_nests_size = nextpoweroftwo(ceil(new_values.size())*10000 + 1);
|
|
||||||
dynamic_2bitset(new_nests_size, true /* fill with 1s */).swap(nests_);
|
|
||||||
vector<bool> used_nests(nests_.size());
|
|
||||||
uint32_t collisions = 0;
|
|
||||||
for (iterator it = begin(), it_end = end(); it != it_end; ++it) {
|
for (iterator it = begin(), it_end = end(); it != it_end; ++it) {
|
||||||
size_type id = index_.perfect_hash(it->first);
|
size_type id = index_.perfect_hash(it->first);
|
||||||
assert(id < new_values.size());
|
assert(id < new_values.size());
|
||||||
new_values[id] = *it;
|
new_values[id] = *it;
|
||||||
new_present[id] = true;
|
new_present[id] = true;
|
||||||
uint32_t h[4];
|
|
||||||
index_.hash_vector(it->first, h);
|
|
||||||
// fprintf(stderr, "Nest index: %d\n", get_nest_index(h));
|
|
||||||
assert(used_nests.size() > get_nest_index(h));
|
|
||||||
if (used_nests[get_nest_index(h)]) {
|
|
||||||
set_nest_value(h, kNestCollision);
|
|
||||||
assert(get_nest_value(h) == kNestCollision);
|
|
||||||
// fprintf(stderr, "Collision at nest index %d among %d positions\n", get_nest_index(h), nests_.size());
|
|
||||||
++collisions;
|
|
||||||
} else {
|
|
||||||
set_nest_value(h, index_.cuckoo_nest(h));
|
|
||||||
assert(get_nest_value(h) == index_.cuckoo_nest(h));
|
|
||||||
assert(index_.perfect_hash(it->first) == index_.cuckoo_hash(h, get_nest_value(h)));
|
|
||||||
used_nests[get_nest_index(h)] = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// fprintf(stderr, "Collision ratio: %f\n", collisions*1.0/size());
|
// fprintf(stderr, "Collision ratio: %f\n", collisions*1.0/size());
|
||||||
values_.swap(new_values);
|
values_.swap(new_values);
|
||||||
present_.swap(new_present);
|
present_.swap(new_present);
|
||||||
slack_type().swap(slack_);
|
slack_type().swap(slack_);
|
||||||
int32_t fast = 0;
|
|
||||||
int32_t slow= 0;
|
|
||||||
for (iterator it = begin(), it_end = end(); it != it_end; ++it) {
|
|
||||||
uint32_t h[4];
|
|
||||||
index_.hash_vector(it->first, h);
|
|
||||||
if (get_nest_value(h) == kNestCollision) ++slow;
|
|
||||||
else {
|
|
||||||
++fast;
|
|
||||||
auto cit = values_.begin() + index_.cuckoo_hash(h, get_nest_value(h));
|
|
||||||
assert(index_.perfect_hash(it->first) == cit - values_.begin());
|
|
||||||
assert(equal_(it->first, cit->first));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// fprintf(stderr, "Predicted fast: %d slow %d\n", fast, slow);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
MPH_MAP_METHOD_DECL(iterator, begin)() { return make_iterator(values_.begin()); }
|
MPH_MAP_METHOD_DECL(iterator, begin)() { return make_iterator(values_.begin()); }
|
||||||
|
@ -252,7 +194,6 @@ MPH_MAP_METHOD_DECL(void_type, clear)() {
|
||||||
present_.clear();
|
present_.clear();
|
||||||
slack_.clear();
|
slack_.clear();
|
||||||
index_.clear();
|
index_.clear();
|
||||||
dynamic_2bitset(8, true /* fill with 1s */).swap(nests_);
|
|
||||||
size_ = 0;
|
size_ = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -260,7 +201,6 @@ MPH_MAP_METHOD_DECL(void_type, erase)(iterator pos) {
|
||||||
present_[pos - begin] = false;
|
present_[pos - begin] = false;
|
||||||
uint32_t h[4];
|
uint32_t h[4];
|
||||||
index_.hash_vector(pos->first, &h);
|
index_.hash_vector(pos->first, &h);
|
||||||
nests_[get_nest_index(h)] = kNestCollision;
|
|
||||||
*pos = value_type();
|
*pos = value_type();
|
||||||
--size_;
|
--size_;
|
||||||
}
|
}
|
||||||
|
@ -270,25 +210,6 @@ MPH_MAP_METHOD_DECL(void_type, erase)(const key_type& k) {
|
||||||
erase(it);
|
erase(it);
|
||||||
}
|
}
|
||||||
|
|
||||||
MPH_MAP_METHOD_DECL(const_iterator, fast_find)(const key_type& k) const {
|
|
||||||
uint32_t h[4];
|
|
||||||
index_.hash_vector(k, h);
|
|
||||||
auto nest = get_nest_value(h);
|
|
||||||
if (__builtin_expect(nest != kNestCollision, 1)) {
|
|
||||||
++fast_taken_;
|
|
||||||
auto vit = values_.begin() + index_.cuckoo_hash(h, nest);
|
|
||||||
// do not hold for unknown keys
|
|
||||||
assert(values_.size() != index_.perfect_hash_size() || equal_(k, vit->first));
|
|
||||||
if (equal_(k, vit->first)) {
|
|
||||||
++fast_;
|
|
||||||
return make_iterator(vit);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
nest = index_.cuckoo_nest(h);
|
|
||||||
++slow_;
|
|
||||||
return slow_find(k, index_.cuckoo_hash(h, nest));
|
|
||||||
}
|
|
||||||
|
|
||||||
MPH_MAP_METHOD_DECL(const_iterator, slow_find)(const key_type& k, uint32_t perfect_hash) const {
|
MPH_MAP_METHOD_DECL(const_iterator, slow_find)(const key_type& k, uint32_t perfect_hash) const {
|
||||||
if (__builtin_expect(index_.perfect_hash_size(), 1)) {
|
if (__builtin_expect(index_.perfect_hash_size(), 1)) {
|
||||||
if (__builtin_expect(present_[perfect_hash], true)) {
|
if (__builtin_expect(present_[perfect_hash], true)) {
|
||||||
|
@ -304,24 +225,6 @@ MPH_MAP_METHOD_DECL(const_iterator, slow_find)(const key_type& k, uint32_t perfe
|
||||||
return end();
|
return end();
|
||||||
}
|
}
|
||||||
|
|
||||||
MPH_MAP_METHOD_DECL(iterator, fast_find)(const key_type& k) {
|
|
||||||
uint32_t h[4];
|
|
||||||
index_.hash_vector(k, h);
|
|
||||||
auto nest = get_nest_value(h);
|
|
||||||
if (__builtin_expect(nest != kNestCollision, 1)) {
|
|
||||||
++fast_taken_;
|
|
||||||
auto vit = values_.begin() + index_.cuckoo_hash(h, nest);
|
|
||||||
assert(values_.size() != index_.perfect_hash_size() || equal_(k, vit->first));
|
|
||||||
if (equal_(k, vit->first)) {
|
|
||||||
++fast_;
|
|
||||||
return make_iterator(vit);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
nest = index_.cuckoo_nest(h);
|
|
||||||
++slow_;
|
|
||||||
return slow_find(k, index_.cuckoo_hash(h, nest));
|
|
||||||
}
|
|
||||||
|
|
||||||
MPH_MAP_METHOD_DECL(iterator, slow_find)(const key_type& k, uint32_t perfect_hash) {
|
MPH_MAP_METHOD_DECL(iterator, slow_find)(const key_type& k, uint32_t perfect_hash) {
|
||||||
if (__builtin_expect(index_.perfect_hash_size(), 1)) {
|
if (__builtin_expect(index_.perfect_hash_size(), 1)) {
|
||||||
if (__builtin_expect(present_[perfect_hash], true)) {
|
if (__builtin_expect(present_[perfect_hash], true)) {
|
||||||
|
|
Loading…
Reference in New Issue