Iterator game.

This commit is contained in:
Davi Reis 2012-03-07 03:10:29 -05:00
parent 20aeaf8ee1
commit c057fb882b
2 changed files with 42 additions and 21 deletions

View File

@ -87,14 +87,12 @@ using namespace cxxmph;
int main(int argc, char** argv) { int main(int argc, char** argv) {
srandom(4); srandom(4);
/*
Benchmark::Register(new BM_CreateUrls<mph_map<StringPiece, StringPiece>>("URLS100k")); Benchmark::Register(new BM_CreateUrls<mph_map<StringPiece, StringPiece>>("URLS100k"));
Benchmark::Register(new BM_CreateUrls<unordered_map<StringPiece, StringPiece>>("URLS100k")); Benchmark::Register(new BM_CreateUrls<unordered_map<StringPiece, StringPiece>>("URLS100k"));
Benchmark::Register(new BM_SearchUrls<mph_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0)); Benchmark::Register(new BM_SearchUrls<mph_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0));
Benchmark::Register(new BM_SearchUrls<unordered_map<StringPiece, StringPiece, Murmur2StringPiece>>("URLS100k", 10*1000 * 1000, 0)); Benchmark::Register(new BM_SearchUrls<unordered_map<StringPiece, StringPiece, Murmur2StringPiece>>("URLS100k", 10*1000 * 1000, 0));
Benchmark::Register(new BM_SearchUrls<mph_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0.9)); Benchmark::Register(new BM_SearchUrls<mph_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0.9));
Benchmark::Register(new BM_SearchUrls<unordered_map<StringPiece, StringPiece, Murmur2StringPiece>>("URLS100k", 10*1000 * 1000, 0.9)); Benchmark::Register(new BM_SearchUrls<unordered_map<StringPiece, StringPiece, Murmur2StringPiece>>("URLS100k", 10*1000 * 1000, 0.9));
*/
Benchmark::Register(new BM_SearchUint64<unordered_map<uint64_t, uint64_t>>); Benchmark::Register(new BM_SearchUint64<unordered_map<uint64_t, uint64_t>>);
Benchmark::Register(new BM_SearchUint64<mph_map<uint64_t, uint64_t>>); Benchmark::Register(new BM_SearchUint64<mph_map<uint64_t, uint64_t>>);
Benchmark::RunAll(); Benchmark::RunAll();

View File

@ -41,8 +41,18 @@ class mph_map {
typedef typename std::vector<value_type>::const_reference const_reference; typedef typename std::vector<value_type>::const_reference const_reference;
typedef typename std::vector<value_type>::size_type size_type; typedef typename std::vector<value_type>::size_type size_type;
typedef typename std::vector<value_type>::difference_type difference_type; typedef typename std::vector<value_type>::difference_type difference_type;
typedef typename std::vector<value_type>::iterator iterator;
typedef typename std::vector<value_type>::const_iterator const_iterator; template <class T, typename iterator>
struct indirect_iterator : public typename slack_type::iterator {
indirect_iterator(T* v, iterator it) : iterator(it), v_(v) { }
const typename iterator::value_type::first_type& operator*() const {
return v->begin() + (this->iterator::operator*())->second;
}
};
typedef indirect_iterator<std::vector<value_type>, slack_type>::iterator iterator;
typedef indirect_iterator<std::vector<value_type>, slack_type>::const_iterator const_iterator;
// For making macros simpler. // For making macros simpler.
typedef void void_type; typedef void void_type;
@ -69,7 +79,7 @@ class mph_map {
data_type& operator[](const key_type &k); data_type& operator[](const key_type &k);
const data_type& operator[](const key_type &k) const; const data_type& operator[](const key_type &k) const;
size_type bucket_count() const { return size(); } size_type bucket_count() const { return index_.perfect_hash_size() + slack_.bucket_count(); }
// FIXME: not sure if this has the semantics I want // FIXME: not sure if this has the semantics I want
void rehash(size_type nbuckets /*ignored*/) { pack(); } void rehash(size_type nbuckets /*ignored*/) { pack(); }
@ -90,6 +100,11 @@ class mph_map {
return iterator_first<iterator>(it); return iterator_first<iterator>(it);
} }
template <class T, typename iterator>
indirect_iterator<iterator> make_indirect_iterator(T* v, iterator it) {
return indirect_iterator<iterator>(v, it);
}
void pack(); void pack();
std::vector<value_type> values_; std::vector<value_type> values_;
SimpleMPHIndex<Key, typename seeded_hash<HashFcn>::hash_function> index_; SimpleMPHIndex<Key, typename seeded_hash<HashFcn>::hash_function> index_;
@ -113,31 +128,39 @@ MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::~mph_map() {
MPH_MAP_METHOD_DECL(insert_return_type, insert)(const value_type& x) { MPH_MAP_METHOD_DECL(insert_return_type, insert)(const value_type& x) {
iterator it = find(x.first); iterator it = find(x.first);
if (it != end()) return make_pair(it, false); if (it != end()) return make_pair(it, false);
should_pack = false;
if (values_.capacity() == values_.size() && values_.size() > 256) {
should_pack = true;
}
values_.push_back(x); values_.push_back(x);
slack_.insert(make_pair(x.first, values_.size() - 1)); slack_.insert(make_pair(x.first, values_.size() - 1));
if (slack_.size() == index_.size() || if (should_pack) pack();
(slack_.size() >= 256 && index_.size() == 0)) {
pack();
}
it = find(x.first); it = find(x.first);
return make_pair(it, true); return make_pair(it, true);
} }
MPH_MAP_METHOD_DECL(void_type, pack)() { MPH_MAP_METHOD_DECL(void_type, pack)() {
if (values_.empty()) return; if (values_.empty()) return;
slack_type().swap(slack_);
bool success = index_.Reset( bool success = index_.Reset(
make_iterator_first(values_.begin()), make_iterator_first(slack_.begin())),
make_iterator_first(values_.end())); make_iterator_first(slack_.end())));
assert(success); assert(success);
std::vector<value_type> new_values(values_.size()); std::vector<value_type> new_values(index_.size());
for (const_iterator it = values_.begin(), end = values_.end(); for (const_iterator it = values_.begin(), end = values_.end();
it != end; ++it) { it != end; ++it) {
size_type id = index_.index(it->first); size_type id = index_.index((*it)->first);
assert(id < new_values.size()); assert(id < new_values.size());
new_values[id] = *it; new_values[id] = *it;
} }
values_.swap(new_values); values_.swap(new_values);
std::vector<size_type> new_values_pointer(
index_.perfect_hash_size());;
for (size_type i = 0; i < values_.size(); ++i) {
size_type id = index_.perfect_hash(values_[i].first);
assert(id < new_values_pointer.size());
new_values_pointer[id] = i;
}
values_pointer_.swap(new_values_pointer);
} }
MPH_MAP_METHOD_DECL(iterator, begin)() { return values_.begin(); } MPH_MAP_METHOD_DECL(iterator, begin)() { return values_.begin(); }
@ -169,25 +192,25 @@ MPH_MAP_METHOD_DECL(const_iterator, find)(const key_type& k) const {
if (it != slack_.end()) return values_.begin() + it->second; if (it != slack_.end()) return values_.begin() + it->second;
} }
if (__builtin_expect(index_.size() == 0, 0)) return end(); if (__builtin_expect(index_.size() == 0, 0)) return end();
const_iterator it = values_.begin() + index_.index(k); const_iterator it = values_.begin() + values_pointer_[index_.perfect_hash(k)];
if (__builtin_expect(equal_(k, it->first), 1)) return it; if (__builtin_expect(equal_(k, it->first), 1)) return it;
return end(); return end();
} }
MPH_MAP_METHOD_DECL(iterator, find)(const key_type& k) { MPH_MAP_METHOD_DECL(iterator, find)(const key_type& k) {
if (!slack_.empty()) { if (__builtin_expect(!slack_.empty(), 0)) {
typename slack_type::const_iterator it = slack_.find(k); typename slack_type::const_iterator it = slack_.find(k);
if (it != slack_.end()) return values_.begin() + it->second; if (it != slack_.end()) return values_.begin() + it->second;
} }
if (index_.size() == 0) return end(); if (__builtin_expect(index_.size() == 0, 0)) return end();
iterator it = values_.begin() + index_.index(k); iterator it = values_.begin() + values_pointer_[index_.perfect_hash(k)];
if (equal_(it->first, k)) return it; if (__builtin_expect(equal_(k, it->first), 1)) return it;
return end(); return end();
} }
MPH_MAP_METHOD_DECL(my_int32_t, index)(const key_type& k) const { MPH_MAP_METHOD_DECL(my_int32_t, index)(const key_type& k) const {
if (index_.size() == 0) return -1; if (index_.size() == 0) return -1;
return index_.index(k); return index_.perfect_hash(k);
} }
MPH_MAP_METHOD_DECL(data_type&, operator[])(const key_type& k) { MPH_MAP_METHOD_DECL(data_type&, operator[])(const key_type& k) {