All tests pass.

This commit is contained in:
Davi de Castro Reis 2011-06-14 02:24:40 -03:00
parent c749ab444b
commit 0846177267
7 changed files with 106 additions and 41 deletions

View File

@ -56,7 +56,7 @@ class SearchUint64Benchmark : public Uint64Benchmark {
protected: protected:
virtual bool SetUp(); virtual bool SetUp();
const uint32_t nsearches_; const uint32_t nsearches_;
std::vector<uint32_t> random_; std::vector<uint64_t> random_;
}; };
} // namespace cxxmph } // namespace cxxmph

View File

@ -1,15 +1,15 @@
#include <set> #include <set>
#include <string> #include <string>
#include <tr1/unordered_set> #include <tr1/unordered_map>
#include "bm_common.h" #include "bm_common.h"
#include "StringPiece.h" #include "stringpiece.h"
#include "mph_index.h" #include "mph_index.h"
using namespace cxxmph; using namespace cxxmph;
using std::string; using std::string;
using std::tr1::unordered_set; using std::tr1::unordered_map;
class BM_MPHIndexCreate : public UrlsBenchmark { class BM_MPHIndexCreate : public UrlsBenchmark {
public: public:
@ -28,8 +28,11 @@ class BM_STLIndexCreate : public UrlsBenchmark {
: UrlsBenchmark(urls_file) { } : UrlsBenchmark(urls_file) { }
protected: protected:
virtual void Run() { virtual void Run() {
unordered_set<StringPiece> index; unordered_map<StringPiece, uint32_t> index;
index.insert(urls_.begin(), urls_.end()); int idx = 0;
for (auto it = urls_.begin(); it != urls_.end(); ++it) {
index.insert(make_pair(*it, idx++));
}
} }
}; };
@ -38,10 +41,10 @@ class BM_MPHIndexSearch : public SearchUrlsBenchmark {
BM_MPHIndexSearch(const std::string& urls_file, int nsearches) BM_MPHIndexSearch(const std::string& urls_file, int nsearches)
: SearchUrlsBenchmark(urls_file, nsearches) { } : SearchUrlsBenchmark(urls_file, nsearches) { }
virtual void Run() { virtual void Run() {
while (true) {
for (auto it = random_.begin(); it != random_.end(); ++it) { for (auto it = random_.begin(); it != random_.end(); ++it) {
index_.index(*it); auto idx = index_.index(*it);
} // Collision check to be fair with STL
if (strcmp(urls_[idx].c_str(), it->data()) != 0) idx = -1;
} }
} }
protected: protected:
@ -59,23 +62,28 @@ class BM_STLIndexSearch : public SearchUrlsBenchmark {
: SearchUrlsBenchmark(urls_file, nsearches) { } : SearchUrlsBenchmark(urls_file, nsearches) { }
virtual void Run() { virtual void Run() {
for (auto it = random_.begin(); it != random_.end(); ++it) { for (auto it = random_.begin(); it != random_.end(); ++it) {
index_.find(*it); // - index_.begin(); auto idx = index_.find(*it);
} }
} }
protected: protected:
virtual bool SetUp () { virtual bool SetUp () {
if (!SearchUrlsBenchmark::SetUp()) return false; if (!SearchUrlsBenchmark::SetUp()) return false;
std::tr1::unordered_set<StringPiece>(urls_.begin(), urls_.end()).swap(index_); unordered_map<StringPiece, uint32_t> index;
int idx = 0;
for (auto it = urls_.begin(); it != urls_.end(); ++it) {
index.insert(make_pair(*it, idx++));
}
index.swap(index_);
return true; return true;
} }
std::tr1::unordered_set<StringPiece> index_; std::tr1::unordered_map<StringPiece, uint32_t> index_;
}; };
int main(int argc, char** argv) { int main(int argc, char** argv) {
Benchmark::Register(new BM_MPHIndexCreate("URLS100k")); Benchmark::Register(new BM_MPHIndexCreate("URLS100k"));
Benchmark::Register(new BM_STLIndexCreate("URLS100k")); Benchmark::Register(new BM_STLIndexCreate("URLS100k"));
Benchmark::Register(new BM_MPHIndexSearch("URLS100k", 1000*1000)); Benchmark::Register(new BM_MPHIndexSearch("URLS100k", 100*1000*1000));
Benchmark::Register(new BM_STLIndexSearch("URLS100k", 1000*1000)); Benchmark::Register(new BM_STLIndexSearch("URLS100k", 100*1000*1000));
Benchmark::RunAll(); Benchmark::RunAll();
return 0; return 0;
} }

View File

@ -10,10 +10,25 @@ using std::tr1::unordered_map;
namespace cxxmph { namespace cxxmph {
uint64_t myfind(const unordered_map<uint64_t, uint64_t, Murmur2>& mymap, const uint64_t& k) {
return mymap.find(k)->second;
}
uint64_t myfind(const mph_map<uint64_t, uint64_t>& mymap, const uint64_t& k) {
return mymap.index(k);
}
const StringPiece& myfind(const unordered_map<StringPiece, StringPiece, Murmur2StringPiece>& mymap, const StringPiece& k) {
return mymap.find(k)->second;
}
StringPiece myfind(const mph_map<StringPiece, StringPiece>& mymap, const StringPiece& k) {
auto it = mymap.find(k);
return it->second;
}
template <class MapType> template <class MapType>
class BM_MapCreate : public UrlsBenchmark { class BM_CreateUrls : public UrlsBenchmark {
public: public:
BM_MapCreate(const string& urls_file) : UrlsBenchmark(urls_file) { } BM_CreateUrls(const string& urls_file) : UrlsBenchmark(urls_file) { }
virtual void Run() { virtual void Run() {
MapType mymap; MapType mymap;
for (auto it = urls_.begin(); it != urls_.end(); ++it) { for (auto it = urls_.begin(); it != urls_.end(); ++it) {
@ -23,13 +38,13 @@ class BM_MapCreate : public UrlsBenchmark {
}; };
template <class MapType> template <class MapType>
class BM_MapSearch : public SearchUrlsBenchmark { class BM_SearchUrls : public SearchUrlsBenchmark {
public: public:
BM_MapSearch(const std::string& urls_file, int nsearches) BM_SearchUrls(const std::string& urls_file, int nsearches)
: SearchUrlsBenchmark(urls_file, nsearches) { } : SearchUrlsBenchmark(urls_file, nsearches) { }
virtual void Run() { virtual void Run() {
for (auto it = random_.begin(); it != random_.end(); ++it) { for (auto it = random_.begin(); it != random_.end(); ++it) {
mymap_.find(*it); auto idx = myfind(mymap_, *it);
} }
} }
protected: protected:
@ -44,14 +59,40 @@ class BM_MapSearch : public SearchUrlsBenchmark {
MapType mymap_; MapType mymap_;
}; };
template <class MapType>
class BM_SearchUint64 : public SearchUint64Benchmark {
public:
BM_SearchUint64() : SearchUint64Benchmark(1000*1000, 1000*1000) { }
virtual bool SetUp() {
if (!SearchUint64Benchmark::SetUp()) return false;
for (int i = 0; i < values_.size(); ++i) {
mymap_[values_[i]] = values_[i];
}
mymap_.rehash(mymap_.bucket_count());
return true;
}
virtual void Run() {
for (auto it = random_.begin(); it != random_.end(); ++it) {
auto v = myfind(mymap_, *it);
}
}
MapType mymap_;
};
} // namespace cxxmph } // namespace cxxmph
using namespace cxxmph; using namespace cxxmph;
int main(int argc, char** argv) { int main(int argc, char** argv) {
Benchmark::Register(new BM_MapCreate<mph_map<StringPiece, StringPiece>>("URLS100k")); /*
Benchmark::Register(new BM_MapCreate<unordered_map<StringPiece, StringPiece>>("URLS100k")); Benchmark::Register(new BM_CreateUrls<mph_map<StringPiece, StringPiece>>("URLS100k"));
Benchmark::Register(new BM_MapSearch<mph_map<StringPiece, StringPiece>>("URLS100k", 1000* 1000)); Benchmark::Register(new BM_CreateUrls<unordered_map<StringPiece, StringPiece>>("URLS100k"));
Benchmark::Register(new BM_MapSearch<unordered_map<StringPiece, StringPiece>>("URLS100k", 1000* 1000)); */
Benchmark::Register(new BM_SearchUrls<mph_map<StringPiece, StringPiece>>("URLS100k", 1000* 1000*100));
/*
Benchmark::Register(new BM_SearchUrls<unordered_map<StringPiece, StringPiece, Murmur2StringPiece>>("URLS100k", 1000* 1000));
Benchmark::Register(new BM_SearchUint64<unordered_map<uint64_t, uint64_t, Murmur2>>);
Benchmark::Register(new BM_SearchUint64<mph_map<uint64_t, uint64_t>>);
*/
Benchmark::RunAll(); Benchmark::RunAll();
} }

View File

@ -149,6 +149,7 @@ template <class SeededHashFcn, class Key>
uint32_t MPHIndex::index(const Key& key) const { uint32_t MPHIndex::index(const Key& key) const {
uint32_t h[3]; uint32_t h[3];
for (int i = 0; i < 3; ++i) h[i] = SeededHashFcn()(key, hash_seed_[i]); for (int i = 0; i < 3; ++i) h[i] = SeededHashFcn()(key, hash_seed_[i]);
assert(r_);
h[0] = h[0] % r_; h[0] = h[0] % r_;
h[1] = h[1] % r_ + r_; h[1] = h[1] % r_ + r_;
h[2] = h[2] % r_ + (r_ << 1); h[2] = h[2] % r_ + (r_ << 1);
@ -169,7 +170,7 @@ class SimpleMPHIndex : public MPHIndex {
bool Reset(ForwardIterator begin, ForwardIterator end) { bool Reset(ForwardIterator begin, ForwardIterator end) {
return MPHIndex::Reset<HashFcn>(begin, end); return MPHIndex::Reset<HashFcn>(begin, end);
} }
uint32_t index(const Key& key) { return MPHIndex::index<HashFcn>(key); } uint32_t index(const Key& key) const { return MPHIndex::index<HashFcn>(key); }
}; };
} // namespace cxxmph } // namespace cxxmph

View File

@ -24,7 +24,7 @@ int main(int argc, char** argv) {
keys.push_back("algume"); keys.push_back("algume");
SimpleMPHIndex<string> mph_index; SimpleMPHIndex<string> mph_index;
assert(mph_index.Reset(keys.begin(), keys.end())); if (!mph_index.Reset(keys.begin(), keys.end())) { exit(-1); }
vector<int> ids; vector<int> ids;
for (vector<int>::size_type i = 0; i < keys.size(); ++i) { for (vector<int>::size_type i = 0; i < keys.size(); ++i) {
ids.push_back(mph_index.index(keys[i])); ids.push_back(mph_index.index(keys[i]));
@ -33,7 +33,6 @@ int main(int argc, char** argv) {
cerr << endl; cerr << endl;
sort(ids.begin(), ids.end()); sort(ids.begin(), ids.end());
for (vector<int>::size_type i = 0; i < ids.size(); ++i) assert(ids[i] == static_cast<vector<int>::value_type>(i)); for (vector<int>::size_type i = 0; i < ids.size(); ++i) assert(ids[i] == static_cast<vector<int>::value_type>(i));
char* serialized = new char[mph_index.serialize_bytes_needed()]; char* serialized = new char[mph_index.serialize_bytes_needed()];
mph_index.serialize(serialized); mph_index.serialize(serialized);
SimpleMPHIndex<string> other_mph_index; SimpleMPHIndex<string> other_mph_index;

View File

@ -52,11 +52,17 @@ class mph_map {
std::pair<iterator, bool> insert(const value_type& x); std::pair<iterator, bool> insert(const value_type& x);
iterator find(const key_type& k); iterator find(const key_type& k);
const_iterator find(const key_type& k) const; const_iterator find(const key_type& k) const;
typedef int32_t my_int32_t;
int32_t index(const key_type& k) const;
data_type& operator[](const key_type &k); data_type& operator[](const key_type &k);
const data_type& operator[](const key_type &k) const;
size_type bucket_count() const { return size(); } size_type bucket_count() const { return size(); }
void rehash(size_type nbuckets /*ignored*/) { pack(); } void rehash(size_type nbuckets /*ignored*/) { pack(); }
protected: // mimicking STL implementation
EqualKey equal_;
private: private:
template <typename iterator> template <typename iterator>
struct iterator_first : public iterator { struct iterator_first : public iterator {
@ -145,30 +151,33 @@ MPH_MAP_METHOD_DECL(void_type, erase)(const key_type& k) {
} }
MPH_MAP_METHOD_DECL(const_iterator, find)(const key_type& k) const { MPH_MAP_METHOD_DECL(const_iterator, find)(const key_type& k) const {
if (!slack_.empty()) { if (__builtin_expect(!slack_.empty(), 0)) {
typename slack_type::const_iterator it = slack_.find(k); typename slack_type::const_iterator it = slack_.find(k);
if (it != slack_.end()) return values_.begin() + it->second; if (it != slack_.end()) return values_.begin() + it->second;
} }
if (index_.size() == 0) return end(); if (__builtin_expect(index_.size() == 0, 0)) return end();
size_type id = index_.index(k); auto it = values_.begin() + index_.index(k);
if (key_equal()(values_[id].first, k)) { if (__builtin_expect(equal_(k, it->first), 1)) return it;
return values_.begin() + id;
}
return end(); return end();
} }
MPH_MAP_METHOD_DECL(iterator, find)(const key_type& k) { MPH_MAP_METHOD_DECL(iterator, find)(const key_type& k) {
if (!slack_.empty()) { if (!slack_.empty()) {
typename slack_type::const_iterator it = slack_.find(k); typename slack_type::const_iterator it = slack_.find(k);
if (it != slack_.end()) return values_.begin() + it->second; if (it != slack_.end()) return values_.begin() + it->second;
} }
if (index_.size() == 0) return end(); if (index_.size() == 0) return end();
size_type id = index_.index(k); auto it = values_.begin() + index_.index(k);
if (key_equal()(values_[id].first, k)) { if (equal_(it->first, k)) return it;
return values_.begin() + id;
}
return end(); return end();
} }
MPH_MAP_METHOD_DECL(my_int32_t, index)(const key_type& k) const {
assert(slack_.empty());
if (index_.size() == 0) return -1;
return index_.index(k);
}
MPH_MAP_METHOD_DECL(data_type&, operator[])(const key_type& k) { MPH_MAP_METHOD_DECL(data_type&, operator[])(const key_type& k) {
return insert(std::make_pair(k, data_type())).first->second; return insert(std::make_pair(k, data_type())).first->second;
} }

View File

@ -19,10 +19,17 @@ struct seeded_hash_function {
} }
}; };
struct seeded_identity_function {
template <class Key>
uint32_t operator()(const Key& k, uint32_t seed) const {
return k ^ seed;
}
};
struct Murmur2 { struct Murmur2 {
template<class Key> template<class Key>
uint32_t operator()(const Key& k) const { uint32_t operator()(const Key& k) const {
return MurmurHash2(k, sizeof(Key), 1 /* seed */); return MurmurHash2(reinterpret_cast<const void*>(&k), sizeof(Key), 1 /* seed */);
} }
}; };
struct Murmur2StringPiece { struct Murmur2StringPiece {