All tests pass.
This commit is contained in:
parent
c749ab444b
commit
0846177267
@ -56,7 +56,7 @@ class SearchUint64Benchmark : public Uint64Benchmark {
|
||||
protected:
|
||||
virtual bool SetUp();
|
||||
const uint32_t nsearches_;
|
||||
std::vector<uint32_t> random_;
|
||||
std::vector<uint64_t> random_;
|
||||
};
|
||||
|
||||
} // namespace cxxmph
|
||||
|
@ -1,15 +1,15 @@
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <tr1/unordered_set>
|
||||
#include <tr1/unordered_map>
|
||||
|
||||
#include "bm_common.h"
|
||||
#include "StringPiece.h"
|
||||
#include "stringpiece.h"
|
||||
#include "mph_index.h"
|
||||
|
||||
using namespace cxxmph;
|
||||
|
||||
using std::string;
|
||||
using std::tr1::unordered_set;
|
||||
using std::tr1::unordered_map;
|
||||
|
||||
class BM_MPHIndexCreate : public UrlsBenchmark {
|
||||
public:
|
||||
@ -28,8 +28,11 @@ class BM_STLIndexCreate : public UrlsBenchmark {
|
||||
: UrlsBenchmark(urls_file) { }
|
||||
protected:
|
||||
virtual void Run() {
|
||||
unordered_set<StringPiece> index;
|
||||
index.insert(urls_.begin(), urls_.end());
|
||||
unordered_map<StringPiece, uint32_t> index;
|
||||
int idx = 0;
|
||||
for (auto it = urls_.begin(); it != urls_.end(); ++it) {
|
||||
index.insert(make_pair(*it, idx++));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@ -38,10 +41,10 @@ class BM_MPHIndexSearch : public SearchUrlsBenchmark {
|
||||
BM_MPHIndexSearch(const std::string& urls_file, int nsearches)
|
||||
: SearchUrlsBenchmark(urls_file, nsearches) { }
|
||||
virtual void Run() {
|
||||
while (true) {
|
||||
for (auto it = random_.begin(); it != random_.end(); ++it) {
|
||||
index_.index(*it);
|
||||
}
|
||||
auto idx = index_.index(*it);
|
||||
// Collision check to be fair with STL
|
||||
if (strcmp(urls_[idx].c_str(), it->data()) != 0) idx = -1;
|
||||
}
|
||||
}
|
||||
protected:
|
||||
@ -59,23 +62,28 @@ class BM_STLIndexSearch : public SearchUrlsBenchmark {
|
||||
: SearchUrlsBenchmark(urls_file, nsearches) { }
|
||||
virtual void Run() {
|
||||
for (auto it = random_.begin(); it != random_.end(); ++it) {
|
||||
index_.find(*it); // - index_.begin();
|
||||
auto idx = index_.find(*it);
|
||||
}
|
||||
}
|
||||
protected:
|
||||
virtual bool SetUp () {
|
||||
if (!SearchUrlsBenchmark::SetUp()) return false;
|
||||
std::tr1::unordered_set<StringPiece>(urls_.begin(), urls_.end()).swap(index_);
|
||||
unordered_map<StringPiece, uint32_t> index;
|
||||
int idx = 0;
|
||||
for (auto it = urls_.begin(); it != urls_.end(); ++it) {
|
||||
index.insert(make_pair(*it, idx++));
|
||||
}
|
||||
index.swap(index_);
|
||||
return true;
|
||||
}
|
||||
std::tr1::unordered_set<StringPiece> index_;
|
||||
std::tr1::unordered_map<StringPiece, uint32_t> index_;
|
||||
};
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
Benchmark::Register(new BM_MPHIndexCreate("URLS100k"));
|
||||
Benchmark::Register(new BM_STLIndexCreate("URLS100k"));
|
||||
Benchmark::Register(new BM_MPHIndexSearch("URLS100k", 1000*1000));
|
||||
Benchmark::Register(new BM_STLIndexSearch("URLS100k", 1000*1000));
|
||||
Benchmark::Register(new BM_MPHIndexSearch("URLS100k", 100*1000*1000));
|
||||
Benchmark::Register(new BM_STLIndexSearch("URLS100k", 100*1000*1000));
|
||||
Benchmark::RunAll();
|
||||
return 0;
|
||||
}
|
||||
|
@ -10,10 +10,25 @@ using std::tr1::unordered_map;
|
||||
|
||||
namespace cxxmph {
|
||||
|
||||
uint64_t myfind(const unordered_map<uint64_t, uint64_t, Murmur2>& mymap, const uint64_t& k) {
|
||||
return mymap.find(k)->second;
|
||||
}
|
||||
uint64_t myfind(const mph_map<uint64_t, uint64_t>& mymap, const uint64_t& k) {
|
||||
return mymap.index(k);
|
||||
}
|
||||
|
||||
const StringPiece& myfind(const unordered_map<StringPiece, StringPiece, Murmur2StringPiece>& mymap, const StringPiece& k) {
|
||||
return mymap.find(k)->second;
|
||||
}
|
||||
StringPiece myfind(const mph_map<StringPiece, StringPiece>& mymap, const StringPiece& k) {
|
||||
auto it = mymap.find(k);
|
||||
return it->second;
|
||||
}
|
||||
|
||||
template <class MapType>
|
||||
class BM_MapCreate : public UrlsBenchmark {
|
||||
class BM_CreateUrls : public UrlsBenchmark {
|
||||
public:
|
||||
BM_MapCreate(const string& urls_file) : UrlsBenchmark(urls_file) { }
|
||||
BM_CreateUrls(const string& urls_file) : UrlsBenchmark(urls_file) { }
|
||||
virtual void Run() {
|
||||
MapType mymap;
|
||||
for (auto it = urls_.begin(); it != urls_.end(); ++it) {
|
||||
@ -23,13 +38,13 @@ class BM_MapCreate : public UrlsBenchmark {
|
||||
};
|
||||
|
||||
template <class MapType>
|
||||
class BM_MapSearch : public SearchUrlsBenchmark {
|
||||
class BM_SearchUrls : public SearchUrlsBenchmark {
|
||||
public:
|
||||
BM_MapSearch(const std::string& urls_file, int nsearches)
|
||||
BM_SearchUrls(const std::string& urls_file, int nsearches)
|
||||
: SearchUrlsBenchmark(urls_file, nsearches) { }
|
||||
virtual void Run() {
|
||||
for (auto it = random_.begin(); it != random_.end(); ++it) {
|
||||
mymap_.find(*it);
|
||||
auto idx = myfind(mymap_, *it);
|
||||
}
|
||||
}
|
||||
protected:
|
||||
@ -44,14 +59,40 @@ class BM_MapSearch : public SearchUrlsBenchmark {
|
||||
MapType mymap_;
|
||||
};
|
||||
|
||||
template <class MapType>
|
||||
class BM_SearchUint64 : public SearchUint64Benchmark {
|
||||
public:
|
||||
BM_SearchUint64() : SearchUint64Benchmark(1000*1000, 1000*1000) { }
|
||||
virtual bool SetUp() {
|
||||
if (!SearchUint64Benchmark::SetUp()) return false;
|
||||
for (int i = 0; i < values_.size(); ++i) {
|
||||
mymap_[values_[i]] = values_[i];
|
||||
}
|
||||
mymap_.rehash(mymap_.bucket_count());
|
||||
return true;
|
||||
}
|
||||
virtual void Run() {
|
||||
for (auto it = random_.begin(); it != random_.end(); ++it) {
|
||||
auto v = myfind(mymap_, *it);
|
||||
}
|
||||
}
|
||||
MapType mymap_;
|
||||
};
|
||||
|
||||
} // namespace cxxmph
|
||||
|
||||
using namespace cxxmph;
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
Benchmark::Register(new BM_MapCreate<mph_map<StringPiece, StringPiece>>("URLS100k"));
|
||||
Benchmark::Register(new BM_MapCreate<unordered_map<StringPiece, StringPiece>>("URLS100k"));
|
||||
Benchmark::Register(new BM_MapSearch<mph_map<StringPiece, StringPiece>>("URLS100k", 1000* 1000));
|
||||
Benchmark::Register(new BM_MapSearch<unordered_map<StringPiece, StringPiece>>("URLS100k", 1000* 1000));
|
||||
/*
|
||||
Benchmark::Register(new BM_CreateUrls<mph_map<StringPiece, StringPiece>>("URLS100k"));
|
||||
Benchmark::Register(new BM_CreateUrls<unordered_map<StringPiece, StringPiece>>("URLS100k"));
|
||||
*/
|
||||
Benchmark::Register(new BM_SearchUrls<mph_map<StringPiece, StringPiece>>("URLS100k", 1000* 1000*100));
|
||||
/*
|
||||
Benchmark::Register(new BM_SearchUrls<unordered_map<StringPiece, StringPiece, Murmur2StringPiece>>("URLS100k", 1000* 1000));
|
||||
Benchmark::Register(new BM_SearchUint64<unordered_map<uint64_t, uint64_t, Murmur2>>);
|
||||
Benchmark::Register(new BM_SearchUint64<mph_map<uint64_t, uint64_t>>);
|
||||
*/
|
||||
Benchmark::RunAll();
|
||||
}
|
||||
|
@ -149,6 +149,7 @@ template <class SeededHashFcn, class Key>
|
||||
uint32_t MPHIndex::index(const Key& key) const {
|
||||
uint32_t h[3];
|
||||
for (int i = 0; i < 3; ++i) h[i] = SeededHashFcn()(key, hash_seed_[i]);
|
||||
assert(r_);
|
||||
h[0] = h[0] % r_;
|
||||
h[1] = h[1] % r_ + r_;
|
||||
h[2] = h[2] % r_ + (r_ << 1);
|
||||
@ -169,7 +170,7 @@ class SimpleMPHIndex : public MPHIndex {
|
||||
bool Reset(ForwardIterator begin, ForwardIterator end) {
|
||||
return MPHIndex::Reset<HashFcn>(begin, end);
|
||||
}
|
||||
uint32_t index(const Key& key) { return MPHIndex::index<HashFcn>(key); }
|
||||
uint32_t index(const Key& key) const { return MPHIndex::index<HashFcn>(key); }
|
||||
};
|
||||
|
||||
} // namespace cxxmph
|
||||
|
@ -24,7 +24,7 @@ int main(int argc, char** argv) {
|
||||
keys.push_back("algume");
|
||||
|
||||
SimpleMPHIndex<string> mph_index;
|
||||
assert(mph_index.Reset(keys.begin(), keys.end()));
|
||||
if (!mph_index.Reset(keys.begin(), keys.end())) { exit(-1); }
|
||||
vector<int> ids;
|
||||
for (vector<int>::size_type i = 0; i < keys.size(); ++i) {
|
||||
ids.push_back(mph_index.index(keys[i]));
|
||||
@ -33,7 +33,6 @@ int main(int argc, char** argv) {
|
||||
cerr << endl;
|
||||
sort(ids.begin(), ids.end());
|
||||
for (vector<int>::size_type i = 0; i < ids.size(); ++i) assert(ids[i] == static_cast<vector<int>::value_type>(i));
|
||||
|
||||
char* serialized = new char[mph_index.serialize_bytes_needed()];
|
||||
mph_index.serialize(serialized);
|
||||
SimpleMPHIndex<string> other_mph_index;
|
||||
|
@ -52,11 +52,17 @@ class mph_map {
|
||||
std::pair<iterator, bool> insert(const value_type& x);
|
||||
iterator find(const key_type& k);
|
||||
const_iterator find(const key_type& k) const;
|
||||
typedef int32_t my_int32_t;
|
||||
int32_t index(const key_type& k) const;
|
||||
data_type& operator[](const key_type &k);
|
||||
const data_type& operator[](const key_type &k) const;
|
||||
|
||||
size_type bucket_count() const { return size(); }
|
||||
void rehash(size_type nbuckets /*ignored*/) { pack(); }
|
||||
|
||||
protected: // mimicking STL implementation
|
||||
EqualKey equal_;
|
||||
|
||||
private:
|
||||
template <typename iterator>
|
||||
struct iterator_first : public iterator {
|
||||
@ -145,30 +151,33 @@ MPH_MAP_METHOD_DECL(void_type, erase)(const key_type& k) {
|
||||
}
|
||||
|
||||
MPH_MAP_METHOD_DECL(const_iterator, find)(const key_type& k) const {
|
||||
if (!slack_.empty()) {
|
||||
typename slack_type::const_iterator it = slack_.find(k);
|
||||
if (it != slack_.end()) return values_.begin() + it->second;
|
||||
}
|
||||
if (index_.size() == 0) return end();
|
||||
size_type id = index_.index(k);
|
||||
if (key_equal()(values_[id].first, k)) {
|
||||
return values_.begin() + id;
|
||||
if (__builtin_expect(!slack_.empty(), 0)) {
|
||||
typename slack_type::const_iterator it = slack_.find(k);
|
||||
if (it != slack_.end()) return values_.begin() + it->second;
|
||||
}
|
||||
if (__builtin_expect(index_.size() == 0, 0)) return end();
|
||||
auto it = values_.begin() + index_.index(k);
|
||||
if (__builtin_expect(equal_(k, it->first), 1)) return it;
|
||||
return end();
|
||||
}
|
||||
|
||||
MPH_MAP_METHOD_DECL(iterator, find)(const key_type& k) {
|
||||
if (!slack_.empty()) {
|
||||
typename slack_type::const_iterator it = slack_.find(k);
|
||||
if (it != slack_.end()) return values_.begin() + it->second;
|
||||
typename slack_type::const_iterator it = slack_.find(k);
|
||||
if (it != slack_.end()) return values_.begin() + it->second;
|
||||
}
|
||||
if (index_.size() == 0) return end();
|
||||
size_type id = index_.index(k);
|
||||
if (key_equal()(values_[id].first, k)) {
|
||||
return values_.begin() + id;
|
||||
}
|
||||
auto it = values_.begin() + index_.index(k);
|
||||
if (equal_(it->first, k)) return it;
|
||||
return end();
|
||||
}
|
||||
|
||||
MPH_MAP_METHOD_DECL(my_int32_t, index)(const key_type& k) const {
|
||||
assert(slack_.empty());
|
||||
if (index_.size() == 0) return -1;
|
||||
return index_.index(k);
|
||||
}
|
||||
|
||||
MPH_MAP_METHOD_DECL(data_type&, operator[])(const key_type& k) {
|
||||
return insert(std::make_pair(k, data_type())).first->second;
|
||||
}
|
||||
|
@ -19,10 +19,17 @@ struct seeded_hash_function {
|
||||
}
|
||||
};
|
||||
|
||||
struct seeded_identity_function {
|
||||
template <class Key>
|
||||
uint32_t operator()(const Key& k, uint32_t seed) const {
|
||||
return k ^ seed;
|
||||
}
|
||||
};
|
||||
|
||||
struct Murmur2 {
|
||||
template<class Key>
|
||||
uint32_t operator()(const Key& k) const {
|
||||
return MurmurHash2(k, sizeof(Key), 1 /* seed */);
|
||||
return MurmurHash2(reinterpret_cast<const void*>(&k), sizeof(Key), 1 /* seed */);
|
||||
}
|
||||
};
|
||||
struct Murmur2StringPiece {
|
||||
|
Loading…
Reference in New Issue
Block a user