All tests pass.

This commit is contained in:
Davi de Castro Reis 2011-06-14 02:24:40 -03:00
parent c749ab444b
commit 0846177267
7 changed files with 106 additions and 41 deletions

View File

@ -56,7 +56,7 @@ class SearchUint64Benchmark : public Uint64Benchmark {
protected:
virtual bool SetUp();
const uint32_t nsearches_;
std::vector<uint32_t> random_;
std::vector<uint64_t> random_;
};
} // namespace cxxmph

View File

@ -1,15 +1,15 @@
#include <set>
#include <string>
#include <tr1/unordered_set>
#include <tr1/unordered_map>
#include "bm_common.h"
#include "StringPiece.h"
#include "stringpiece.h"
#include "mph_index.h"
using namespace cxxmph;
using std::string;
using std::tr1::unordered_set;
using std::tr1::unordered_map;
class BM_MPHIndexCreate : public UrlsBenchmark {
public:
@ -28,8 +28,11 @@ class BM_STLIndexCreate : public UrlsBenchmark {
: UrlsBenchmark(urls_file) { }
protected:
virtual void Run() {
unordered_set<StringPiece> index;
index.insert(urls_.begin(), urls_.end());
unordered_map<StringPiece, uint32_t> index;
int idx = 0;
for (auto it = urls_.begin(); it != urls_.end(); ++it) {
index.insert(make_pair(*it, idx++));
}
}
};
@ -38,10 +41,10 @@ class BM_MPHIndexSearch : public SearchUrlsBenchmark {
BM_MPHIndexSearch(const std::string& urls_file, int nsearches)
: SearchUrlsBenchmark(urls_file, nsearches) { }
virtual void Run() {
while (true) {
for (auto it = random_.begin(); it != random_.end(); ++it) {
index_.index(*it);
}
auto idx = index_.index(*it);
// Collision check to be fair with STL
if (strcmp(urls_[idx].c_str(), it->data()) != 0) idx = -1;
}
}
protected:
@ -59,23 +62,28 @@ class BM_STLIndexSearch : public SearchUrlsBenchmark {
: SearchUrlsBenchmark(urls_file, nsearches) { }
virtual void Run() {
for (auto it = random_.begin(); it != random_.end(); ++it) {
index_.find(*it); // - index_.begin();
auto idx = index_.find(*it);
}
}
protected:
virtual bool SetUp () {
if (!SearchUrlsBenchmark::SetUp()) return false;
std::tr1::unordered_set<StringPiece>(urls_.begin(), urls_.end()).swap(index_);
unordered_map<StringPiece, uint32_t> index;
int idx = 0;
for (auto it = urls_.begin(); it != urls_.end(); ++it) {
index.insert(make_pair(*it, idx++));
}
index.swap(index_);
return true;
}
std::tr1::unordered_set<StringPiece> index_;
std::tr1::unordered_map<StringPiece, uint32_t> index_;
};
int main(int argc, char** argv) {
Benchmark::Register(new BM_MPHIndexCreate("URLS100k"));
Benchmark::Register(new BM_STLIndexCreate("URLS100k"));
Benchmark::Register(new BM_MPHIndexSearch("URLS100k", 1000*1000));
Benchmark::Register(new BM_STLIndexSearch("URLS100k", 1000*1000));
Benchmark::Register(new BM_MPHIndexSearch("URLS100k", 100*1000*1000));
Benchmark::Register(new BM_STLIndexSearch("URLS100k", 100*1000*1000));
Benchmark::RunAll();
return 0;
}

View File

@ -10,10 +10,25 @@ using std::tr1::unordered_map;
namespace cxxmph {
uint64_t myfind(const unordered_map<uint64_t, uint64_t, Murmur2>& mymap, const uint64_t& k) {
return mymap.find(k)->second;
}
uint64_t myfind(const mph_map<uint64_t, uint64_t>& mymap, const uint64_t& k) {
return mymap.index(k);
}
const StringPiece& myfind(const unordered_map<StringPiece, StringPiece, Murmur2StringPiece>& mymap, const StringPiece& k) {
return mymap.find(k)->second;
}
StringPiece myfind(const mph_map<StringPiece, StringPiece>& mymap, const StringPiece& k) {
auto it = mymap.find(k);
return it->second;
}
template <class MapType>
class BM_MapCreate : public UrlsBenchmark {
class BM_CreateUrls : public UrlsBenchmark {
public:
BM_MapCreate(const string& urls_file) : UrlsBenchmark(urls_file) { }
BM_CreateUrls(const string& urls_file) : UrlsBenchmark(urls_file) { }
virtual void Run() {
MapType mymap;
for (auto it = urls_.begin(); it != urls_.end(); ++it) {
@ -23,13 +38,13 @@ class BM_MapCreate : public UrlsBenchmark {
};
template <class MapType>
class BM_MapSearch : public SearchUrlsBenchmark {
class BM_SearchUrls : public SearchUrlsBenchmark {
public:
BM_MapSearch(const std::string& urls_file, int nsearches)
BM_SearchUrls(const std::string& urls_file, int nsearches)
: SearchUrlsBenchmark(urls_file, nsearches) { }
virtual void Run() {
for (auto it = random_.begin(); it != random_.end(); ++it) {
mymap_.find(*it);
auto idx = myfind(mymap_, *it);
}
}
protected:
@ -44,14 +59,40 @@ class BM_MapSearch : public SearchUrlsBenchmark {
MapType mymap_;
};
template <class MapType>
class BM_SearchUint64 : public SearchUint64Benchmark {
public:
BM_SearchUint64() : SearchUint64Benchmark(1000*1000, 1000*1000) { }
virtual bool SetUp() {
if (!SearchUint64Benchmark::SetUp()) return false;
for (int i = 0; i < values_.size(); ++i) {
mymap_[values_[i]] = values_[i];
}
mymap_.rehash(mymap_.bucket_count());
return true;
}
virtual void Run() {
for (auto it = random_.begin(); it != random_.end(); ++it) {
auto v = myfind(mymap_, *it);
}
}
MapType mymap_;
};
} // namespace cxxmph
using namespace cxxmph;
int main(int argc, char** argv) {
Benchmark::Register(new BM_MapCreate<mph_map<StringPiece, StringPiece>>("URLS100k"));
Benchmark::Register(new BM_MapCreate<unordered_map<StringPiece, StringPiece>>("URLS100k"));
Benchmark::Register(new BM_MapSearch<mph_map<StringPiece, StringPiece>>("URLS100k", 1000* 1000));
Benchmark::Register(new BM_MapSearch<unordered_map<StringPiece, StringPiece>>("URLS100k", 1000* 1000));
/*
Benchmark::Register(new BM_CreateUrls<mph_map<StringPiece, StringPiece>>("URLS100k"));
Benchmark::Register(new BM_CreateUrls<unordered_map<StringPiece, StringPiece>>("URLS100k"));
*/
Benchmark::Register(new BM_SearchUrls<mph_map<StringPiece, StringPiece>>("URLS100k", 1000* 1000*100));
/*
Benchmark::Register(new BM_SearchUrls<unordered_map<StringPiece, StringPiece, Murmur2StringPiece>>("URLS100k", 1000* 1000));
Benchmark::Register(new BM_SearchUint64<unordered_map<uint64_t, uint64_t, Murmur2>>);
Benchmark::Register(new BM_SearchUint64<mph_map<uint64_t, uint64_t>>);
*/
Benchmark::RunAll();
}

View File

@ -149,6 +149,7 @@ template <class SeededHashFcn, class Key>
uint32_t MPHIndex::index(const Key& key) const {
uint32_t h[3];
for (int i = 0; i < 3; ++i) h[i] = SeededHashFcn()(key, hash_seed_[i]);
assert(r_);
h[0] = h[0] % r_;
h[1] = h[1] % r_ + r_;
h[2] = h[2] % r_ + (r_ << 1);
@ -169,7 +170,7 @@ class SimpleMPHIndex : public MPHIndex {
bool Reset(ForwardIterator begin, ForwardIterator end) {
return MPHIndex::Reset<HashFcn>(begin, end);
}
uint32_t index(const Key& key) { return MPHIndex::index<HashFcn>(key); }
uint32_t index(const Key& key) const { return MPHIndex::index<HashFcn>(key); }
};
} // namespace cxxmph

View File

@ -24,7 +24,7 @@ int main(int argc, char** argv) {
keys.push_back("algume");
SimpleMPHIndex<string> mph_index;
assert(mph_index.Reset(keys.begin(), keys.end()));
if (!mph_index.Reset(keys.begin(), keys.end())) { exit(-1); }
vector<int> ids;
for (vector<int>::size_type i = 0; i < keys.size(); ++i) {
ids.push_back(mph_index.index(keys[i]));
@ -33,7 +33,6 @@ int main(int argc, char** argv) {
cerr << endl;
sort(ids.begin(), ids.end());
for (vector<int>::size_type i = 0; i < ids.size(); ++i) assert(ids[i] == static_cast<vector<int>::value_type>(i));
char* serialized = new char[mph_index.serialize_bytes_needed()];
mph_index.serialize(serialized);
SimpleMPHIndex<string> other_mph_index;

View File

@ -52,11 +52,17 @@ class mph_map {
std::pair<iterator, bool> insert(const value_type& x);
iterator find(const key_type& k);
const_iterator find(const key_type& k) const;
typedef int32_t my_int32_t;
int32_t index(const key_type& k) const;
data_type& operator[](const key_type &k);
const data_type& operator[](const key_type &k) const;
size_type bucket_count() const { return size(); }
void rehash(size_type nbuckets /*ignored*/) { pack(); }
protected: // mimicking STL implementation
EqualKey equal_;
private:
template <typename iterator>
struct iterator_first : public iterator {
@ -145,30 +151,33 @@ MPH_MAP_METHOD_DECL(void_type, erase)(const key_type& k) {
}
MPH_MAP_METHOD_DECL(const_iterator, find)(const key_type& k) const {
if (!slack_.empty()) {
typename slack_type::const_iterator it = slack_.find(k);
if (it != slack_.end()) return values_.begin() + it->second;
}
if (index_.size() == 0) return end();
size_type id = index_.index(k);
if (key_equal()(values_[id].first, k)) {
return values_.begin() + id;
if (__builtin_expect(!slack_.empty(), 0)) {
typename slack_type::const_iterator it = slack_.find(k);
if (it != slack_.end()) return values_.begin() + it->second;
}
if (__builtin_expect(index_.size() == 0, 0)) return end();
auto it = values_.begin() + index_.index(k);
if (__builtin_expect(equal_(k, it->first), 1)) return it;
return end();
}
MPH_MAP_METHOD_DECL(iterator, find)(const key_type& k) {
if (!slack_.empty()) {
typename slack_type::const_iterator it = slack_.find(k);
if (it != slack_.end()) return values_.begin() + it->second;
typename slack_type::const_iterator it = slack_.find(k);
if (it != slack_.end()) return values_.begin() + it->second;
}
if (index_.size() == 0) return end();
size_type id = index_.index(k);
if (key_equal()(values_[id].first, k)) {
return values_.begin() + id;
}
auto it = values_.begin() + index_.index(k);
if (equal_(it->first, k)) return it;
return end();
}
MPH_MAP_METHOD_DECL(my_int32_t, index)(const key_type& k) const {
assert(slack_.empty());
if (index_.size() == 0) return -1;
return index_.index(k);
}
MPH_MAP_METHOD_DECL(data_type&, operator[])(const key_type& k) {
return insert(std::make_pair(k, data_type())).first->second;
}

View File

@ -19,10 +19,17 @@ struct seeded_hash_function {
}
};
struct seeded_identity_function {
template <class Key>
uint32_t operator()(const Key& k, uint32_t seed) const {
return k ^ seed;
}
};
struct Murmur2 {
template<class Key>
uint32_t operator()(const Key& k) const {
return MurmurHash2(k, sizeof(Key), 1 /* seed */);
return MurmurHash2(reinterpret_cast<const void*>(&k), sizeof(Key), 1 /* seed */);
}
};
struct Murmur2StringPiece {