Perfect hash working, but it is slower.

This commit is contained in:
Davi Reis 2012-03-12 00:17:08 -03:00
parent 238e384367
commit 09c1af7771
4 changed files with 42 additions and 22 deletions

View File

@ -17,7 +17,7 @@ struct hollow_iterator_base
typedef typename iterator::pointer pointer; typedef typename iterator::pointer pointer;
hollow_iterator_base(container* c, presence* p, iterator it) hollow_iterator_base(container* c, presence* p, iterator it)
: c_(c), p_(p), it_(it) { find_present(); } : c_(c), p_(p), it_(it) { if (c_) find_present(); }
self_reference operator++() { self_reference operator++() {
++it_; find_present(); ++it_; find_present();
} }
@ -44,6 +44,7 @@ struct hollow_iterator : public hollow_iterator_base<
container_type, std::vector<bool>, typename container_type::iterator> { container_type, std::vector<bool>, typename container_type::iterator> {
typedef hollow_iterator_base< typedef hollow_iterator_base<
container_type, std::vector<bool>, typename container_type::iterator> parent_class; container_type, std::vector<bool>, typename container_type::iterator> parent_class;
hollow_iterator() : parent_class(NULL, NULL, typename container_type::iterator()) { }
hollow_iterator(typename parent_class::container* c, hollow_iterator(typename parent_class::container* c,
typename parent_class::presence* p, typename parent_class::presence* p,
typename parent_class::iterator it) typename parent_class::iterator it)
@ -58,6 +59,7 @@ struct hollow_const_iterator : public hollow_iterator_base<
typedef hollow_const_iterator<container_type> self_type; typedef hollow_const_iterator<container_type> self_type;
typedef hollow_iterator<container_type> non_const_type; typedef hollow_iterator<container_type> non_const_type;
hollow_const_iterator(non_const_type rhs) : parent_class(rhs.c_, rhs.p_, typename container_type::const_iterator(rhs.it_)) { } hollow_const_iterator(non_const_type rhs) : parent_class(rhs.c_, rhs.p_, typename container_type::const_iterator(rhs.it_)) { }
hollow_const_iterator() : parent_class(NULL, NULL, typename container_type::iterator()) { }
hollow_const_iterator(const typename parent_class::container* c, hollow_const_iterator(const typename parent_class::container* c,
const typename parent_class::presence* p, const typename parent_class::presence* p,
typename parent_class::iterator it) typename parent_class::iterator it)

View File

@ -31,5 +31,8 @@ int main(int argc, char** argv) {
auto it1 = hollow_iterator<vector<int>>(&v, &p, v.begin()); auto it1 = hollow_iterator<vector<int>>(&v, &p, v.begin());
auto it2 = hollow_const_iterator<vector<int>>(&v, &p, v.begin()); auto it2 = hollow_const_iterator<vector<int>>(&v, &p, v.begin());
if (it1 != it2) exit(-1); if (it1 != it2) exit(-1);
hollow_iterator<vector<int>> default_constructed;
default_constructed = hollow_iterator<vector<int>>(&v, &p, v.begin());
} }

View File

@ -7,6 +7,7 @@
// and should not be used if performance is a concern. In fact, you should only // and should not be used if performance is a concern. In fact, you should only
// use it for educational purposes. // use it for educational purposes.
#include <iostream>
#include <algorithm> #include <algorithm>
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
@ -71,9 +72,8 @@ class mph_map {
data_type& operator[](const key_type &k); data_type& operator[](const key_type &k);
const data_type& operator[](const key_type &k) const; const data_type& operator[](const key_type &k) const;
size_type bucket_count() const { return index_.perfect_hash_size() + slack_.bucket_count(); } size_type bucket_count() const { return index_.size() + slack_.bucket_count(); }
// FIXME: not sure if this has the semantics I want void rehash(size_type nbuckets /*ignored*/);
void rehash(size_type nbuckets /*ignored*/) { pack(); }
protected: // mimicking STL implementation protected: // mimicking STL implementation
EqualKey equal_; EqualKey equal_;
@ -131,6 +131,7 @@ MPH_MAP_METHOD_DECL(insert_return_type, insert)(const value_type& x) {
} }
values_.push_back(x); values_.push_back(x);
present_.push_back(true); present_.push_back(true);
++size_;
slack_.insert(make_pair(x.first, values_.size() - 1)); slack_.insert(make_pair(x.first, values_.size() - 1));
if (should_pack) pack(); if (should_pack) pack();
it = find(x.first); it = find(x.first);
@ -143,10 +144,12 @@ MPH_MAP_METHOD_DECL(void_type, pack)() {
make_iterator_first(begin()), make_iterator_first(begin()),
make_iterator_first(end()), size_); make_iterator_first(end()), size_);
assert(success); assert(success);
std::vector<value_type> new_values(index_.size()); std::vector<value_type> new_values(index_.perfect_hash_size());
std::vector<bool> new_present(index_.size(), false); new_values.reserve(new_values.size() * 2);
for (iterator it(begin()), it_end(end()); it != it_end; ++it) { std::vector<bool> new_present(index_.perfect_hash_size(), false);
size_type id = index_.index(it->first); new_present.reserve(new_present.size() * 2);
for (iterator it = begin(), it_end = end(); it != it_end; ++it) {
size_type id = index_.perfect_hash(it->first);
assert(id < new_values.size()); assert(id < new_values.size());
new_values[id] = *it; new_values[id] = *it;
new_present[id] = true; new_present[id] = true;
@ -168,11 +171,13 @@ MPH_MAP_METHOD_DECL(void_type, clear)() {
present_.clear(); present_.clear();
slack_.clear(); slack_.clear();
index_.clear(); index_.clear();
size_ = 0;
} }
MPH_MAP_METHOD_DECL(void_type, erase)(iterator pos) { MPH_MAP_METHOD_DECL(void_type, erase)(iterator pos) {
present_[pos - begin] = false; present_[pos - begin] = false;
*pos = value_type(); *pos = value_type();
--size_;
} }
MPH_MAP_METHOD_DECL(void_type, erase)(const key_type& k) { MPH_MAP_METHOD_DECL(void_type, erase)(const key_type& k) {
iterator it = find(k); iterator it = find(k);
@ -214,6 +219,13 @@ MPH_MAP_METHOD_DECL(my_int32_t, index)(const key_type& k) const {
MPH_MAP_METHOD_DECL(data_type&, operator[])(const key_type& k) { MPH_MAP_METHOD_DECL(data_type&, operator[])(const key_type& k) {
return insert(make_pair(k, data_type())).first->second; return insert(make_pair(k, data_type())).first->second;
} }
MPH_MAP_METHOD_DECL(void_type, rehash)(size_type nbuckets) {
pack();
vector<value_type>(values_.begin(), values_.end()).swap(values_);
vector<bool>(present_.begin(), present_.end()).swap(present_);
slack_type().swap(slack_);
}
} // namespace cxxmph } // namespace cxxmph

View File

@ -11,21 +11,25 @@ using cxxmph::mph_map;
int main(int argc, char** argv) { int main(int argc, char** argv) {
mph_map<int64_t, int64_t> b; mph_map<int64_t, int64_t> b;
for (int i = 0; i < 100*1000; ++i) { int32_t num_keys = 1000*10;
for (int i = 0; i < num_keys; ++i) {
b.insert(make_pair(i, i)); b.insert(make_pair(i, i));
} }
for (int i = 0; i < 1000*1000; ++i) { for (int i = 0; i < num_keys; ++i) {
b.find(i); auto it = b.find(i);
if (it->first != it->second || it->first != i) {
std::cerr << "Found " << it->first << " looking for " << i << std::endl;
exit(-1);
}
} }
/*
mph_map<string, int> h; mph_map<string, int> h;
h.insert(std::make_pair("-1",-1)); h.insert(std::make_pair("-1",-1));
mph_map<string, int>::const_iterator it; mph_map<string, int>::const_iterator it;
for (it = h.begin(); it != h.end(); ++it) { for (it = h.begin(); it != h.end(); ++it) {
std::cerr << it->first << " -> " << it->second << std::endl; if (it->second != -1) exit(-1);
} }
std::cerr << "Search -1 gives " << h.find("-1")->second << std::endl; int32_t num_valid = 100;
for (int i = 0; i < 100; ++i) { for (int i = 0; i < num_valid; ++i) {
char buf[10]; char buf[10];
snprintf(buf, 10, "%d", i); snprintf(buf, 10, "%d", i);
h.insert(std::make_pair(buf, i)); h.insert(std::make_pair(buf, i));
@ -34,18 +38,17 @@ int main(int argc, char** argv) {
for (int i = 1000; i > 0; --i) { for (int i = 1000; i > 0; --i) {
char buf[10]; char buf[10];
snprintf(buf, 10, "%d", i - 1); snprintf(buf, 10, "%d", i - 1);
h.find(buf); auto it = h.find(buf);
std::cerr << "Search " << i - 1 << " gives " << h.find(buf)->second << std::endl; if (i < num_valid && it->second != i - 1) exit(-1);
} }
} }
for (int j = 0; j < 100; ++j) { for (int j = 0; j < 100; ++j) {
for (int i = 1000; i > 0; --i) { for (int i = 1000; i > 0; --i) {
char buf[10]; char buf[10];
snprintf(buf, 10, "%d", i*100 - 1); int key = i*100 - 1;
h.find(buf); snprintf(buf, 10, "%d", key);
std::cerr << "Search " << i*100 - 1 << " gives " << h.find(buf)->second << std::endl; auto it = h.find(buf);
if (key < num_valid && it->second != key) exit(-1);
} }
} }
*/
} }