All tests pass.
This commit is contained in:
parent
c749ab444b
commit
0846177267
@ -56,7 +56,7 @@ class SearchUint64Benchmark : public Uint64Benchmark {
|
|||||||
protected:
|
protected:
|
||||||
virtual bool SetUp();
|
virtual bool SetUp();
|
||||||
const uint32_t nsearches_;
|
const uint32_t nsearches_;
|
||||||
std::vector<uint32_t> random_;
|
std::vector<uint64_t> random_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace cxxmph
|
} // namespace cxxmph
|
||||||
|
@ -1,15 +1,15 @@
|
|||||||
#include <set>
|
#include <set>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <tr1/unordered_set>
|
#include <tr1/unordered_map>
|
||||||
|
|
||||||
#include "bm_common.h"
|
#include "bm_common.h"
|
||||||
#include "StringPiece.h"
|
#include "stringpiece.h"
|
||||||
#include "mph_index.h"
|
#include "mph_index.h"
|
||||||
|
|
||||||
using namespace cxxmph;
|
using namespace cxxmph;
|
||||||
|
|
||||||
using std::string;
|
using std::string;
|
||||||
using std::tr1::unordered_set;
|
using std::tr1::unordered_map;
|
||||||
|
|
||||||
class BM_MPHIndexCreate : public UrlsBenchmark {
|
class BM_MPHIndexCreate : public UrlsBenchmark {
|
||||||
public:
|
public:
|
||||||
@ -28,8 +28,11 @@ class BM_STLIndexCreate : public UrlsBenchmark {
|
|||||||
: UrlsBenchmark(urls_file) { }
|
: UrlsBenchmark(urls_file) { }
|
||||||
protected:
|
protected:
|
||||||
virtual void Run() {
|
virtual void Run() {
|
||||||
unordered_set<StringPiece> index;
|
unordered_map<StringPiece, uint32_t> index;
|
||||||
index.insert(urls_.begin(), urls_.end());
|
int idx = 0;
|
||||||
|
for (auto it = urls_.begin(); it != urls_.end(); ++it) {
|
||||||
|
index.insert(make_pair(*it, idx++));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -38,10 +41,10 @@ class BM_MPHIndexSearch : public SearchUrlsBenchmark {
|
|||||||
BM_MPHIndexSearch(const std::string& urls_file, int nsearches)
|
BM_MPHIndexSearch(const std::string& urls_file, int nsearches)
|
||||||
: SearchUrlsBenchmark(urls_file, nsearches) { }
|
: SearchUrlsBenchmark(urls_file, nsearches) { }
|
||||||
virtual void Run() {
|
virtual void Run() {
|
||||||
while (true) {
|
|
||||||
for (auto it = random_.begin(); it != random_.end(); ++it) {
|
for (auto it = random_.begin(); it != random_.end(); ++it) {
|
||||||
index_.index(*it);
|
auto idx = index_.index(*it);
|
||||||
}
|
// Collision check to be fair with STL
|
||||||
|
if (strcmp(urls_[idx].c_str(), it->data()) != 0) idx = -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
protected:
|
protected:
|
||||||
@ -59,23 +62,28 @@ class BM_STLIndexSearch : public SearchUrlsBenchmark {
|
|||||||
: SearchUrlsBenchmark(urls_file, nsearches) { }
|
: SearchUrlsBenchmark(urls_file, nsearches) { }
|
||||||
virtual void Run() {
|
virtual void Run() {
|
||||||
for (auto it = random_.begin(); it != random_.end(); ++it) {
|
for (auto it = random_.begin(); it != random_.end(); ++it) {
|
||||||
index_.find(*it); // - index_.begin();
|
auto idx = index_.find(*it);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
protected:
|
protected:
|
||||||
virtual bool SetUp () {
|
virtual bool SetUp () {
|
||||||
if (!SearchUrlsBenchmark::SetUp()) return false;
|
if (!SearchUrlsBenchmark::SetUp()) return false;
|
||||||
std::tr1::unordered_set<StringPiece>(urls_.begin(), urls_.end()).swap(index_);
|
unordered_map<StringPiece, uint32_t> index;
|
||||||
|
int idx = 0;
|
||||||
|
for (auto it = urls_.begin(); it != urls_.end(); ++it) {
|
||||||
|
index.insert(make_pair(*it, idx++));
|
||||||
|
}
|
||||||
|
index.swap(index_);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
std::tr1::unordered_set<StringPiece> index_;
|
std::tr1::unordered_map<StringPiece, uint32_t> index_;
|
||||||
};
|
};
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
Benchmark::Register(new BM_MPHIndexCreate("URLS100k"));
|
Benchmark::Register(new BM_MPHIndexCreate("URLS100k"));
|
||||||
Benchmark::Register(new BM_STLIndexCreate("URLS100k"));
|
Benchmark::Register(new BM_STLIndexCreate("URLS100k"));
|
||||||
Benchmark::Register(new BM_MPHIndexSearch("URLS100k", 1000*1000));
|
Benchmark::Register(new BM_MPHIndexSearch("URLS100k", 100*1000*1000));
|
||||||
Benchmark::Register(new BM_STLIndexSearch("URLS100k", 1000*1000));
|
Benchmark::Register(new BM_STLIndexSearch("URLS100k", 100*1000*1000));
|
||||||
Benchmark::RunAll();
|
Benchmark::RunAll();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -10,10 +10,25 @@ using std::tr1::unordered_map;
|
|||||||
|
|
||||||
namespace cxxmph {
|
namespace cxxmph {
|
||||||
|
|
||||||
|
uint64_t myfind(const unordered_map<uint64_t, uint64_t, Murmur2>& mymap, const uint64_t& k) {
|
||||||
|
return mymap.find(k)->second;
|
||||||
|
}
|
||||||
|
uint64_t myfind(const mph_map<uint64_t, uint64_t>& mymap, const uint64_t& k) {
|
||||||
|
return mymap.index(k);
|
||||||
|
}
|
||||||
|
|
||||||
|
const StringPiece& myfind(const unordered_map<StringPiece, StringPiece, Murmur2StringPiece>& mymap, const StringPiece& k) {
|
||||||
|
return mymap.find(k)->second;
|
||||||
|
}
|
||||||
|
StringPiece myfind(const mph_map<StringPiece, StringPiece>& mymap, const StringPiece& k) {
|
||||||
|
auto it = mymap.find(k);
|
||||||
|
return it->second;
|
||||||
|
}
|
||||||
|
|
||||||
template <class MapType>
|
template <class MapType>
|
||||||
class BM_MapCreate : public UrlsBenchmark {
|
class BM_CreateUrls : public UrlsBenchmark {
|
||||||
public:
|
public:
|
||||||
BM_MapCreate(const string& urls_file) : UrlsBenchmark(urls_file) { }
|
BM_CreateUrls(const string& urls_file) : UrlsBenchmark(urls_file) { }
|
||||||
virtual void Run() {
|
virtual void Run() {
|
||||||
MapType mymap;
|
MapType mymap;
|
||||||
for (auto it = urls_.begin(); it != urls_.end(); ++it) {
|
for (auto it = urls_.begin(); it != urls_.end(); ++it) {
|
||||||
@ -23,13 +38,13 @@ class BM_MapCreate : public UrlsBenchmark {
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <class MapType>
|
template <class MapType>
|
||||||
class BM_MapSearch : public SearchUrlsBenchmark {
|
class BM_SearchUrls : public SearchUrlsBenchmark {
|
||||||
public:
|
public:
|
||||||
BM_MapSearch(const std::string& urls_file, int nsearches)
|
BM_SearchUrls(const std::string& urls_file, int nsearches)
|
||||||
: SearchUrlsBenchmark(urls_file, nsearches) { }
|
: SearchUrlsBenchmark(urls_file, nsearches) { }
|
||||||
virtual void Run() {
|
virtual void Run() {
|
||||||
for (auto it = random_.begin(); it != random_.end(); ++it) {
|
for (auto it = random_.begin(); it != random_.end(); ++it) {
|
||||||
mymap_.find(*it);
|
auto idx = myfind(mymap_, *it);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
protected:
|
protected:
|
||||||
@ -44,14 +59,40 @@ class BM_MapSearch : public SearchUrlsBenchmark {
|
|||||||
MapType mymap_;
|
MapType mymap_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <class MapType>
|
||||||
|
class BM_SearchUint64 : public SearchUint64Benchmark {
|
||||||
|
public:
|
||||||
|
BM_SearchUint64() : SearchUint64Benchmark(1000*1000, 1000*1000) { }
|
||||||
|
virtual bool SetUp() {
|
||||||
|
if (!SearchUint64Benchmark::SetUp()) return false;
|
||||||
|
for (int i = 0; i < values_.size(); ++i) {
|
||||||
|
mymap_[values_[i]] = values_[i];
|
||||||
|
}
|
||||||
|
mymap_.rehash(mymap_.bucket_count());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
virtual void Run() {
|
||||||
|
for (auto it = random_.begin(); it != random_.end(); ++it) {
|
||||||
|
auto v = myfind(mymap_, *it);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
MapType mymap_;
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace cxxmph
|
} // namespace cxxmph
|
||||||
|
|
||||||
using namespace cxxmph;
|
using namespace cxxmph;
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
Benchmark::Register(new BM_MapCreate<mph_map<StringPiece, StringPiece>>("URLS100k"));
|
/*
|
||||||
Benchmark::Register(new BM_MapCreate<unordered_map<StringPiece, StringPiece>>("URLS100k"));
|
Benchmark::Register(new BM_CreateUrls<mph_map<StringPiece, StringPiece>>("URLS100k"));
|
||||||
Benchmark::Register(new BM_MapSearch<mph_map<StringPiece, StringPiece>>("URLS100k", 1000* 1000));
|
Benchmark::Register(new BM_CreateUrls<unordered_map<StringPiece, StringPiece>>("URLS100k"));
|
||||||
Benchmark::Register(new BM_MapSearch<unordered_map<StringPiece, StringPiece>>("URLS100k", 1000* 1000));
|
*/
|
||||||
|
Benchmark::Register(new BM_SearchUrls<mph_map<StringPiece, StringPiece>>("URLS100k", 1000* 1000*100));
|
||||||
|
/*
|
||||||
|
Benchmark::Register(new BM_SearchUrls<unordered_map<StringPiece, StringPiece, Murmur2StringPiece>>("URLS100k", 1000* 1000));
|
||||||
|
Benchmark::Register(new BM_SearchUint64<unordered_map<uint64_t, uint64_t, Murmur2>>);
|
||||||
|
Benchmark::Register(new BM_SearchUint64<mph_map<uint64_t, uint64_t>>);
|
||||||
|
*/
|
||||||
Benchmark::RunAll();
|
Benchmark::RunAll();
|
||||||
}
|
}
|
||||||
|
@ -149,6 +149,7 @@ template <class SeededHashFcn, class Key>
|
|||||||
uint32_t MPHIndex::index(const Key& key) const {
|
uint32_t MPHIndex::index(const Key& key) const {
|
||||||
uint32_t h[3];
|
uint32_t h[3];
|
||||||
for (int i = 0; i < 3; ++i) h[i] = SeededHashFcn()(key, hash_seed_[i]);
|
for (int i = 0; i < 3; ++i) h[i] = SeededHashFcn()(key, hash_seed_[i]);
|
||||||
|
assert(r_);
|
||||||
h[0] = h[0] % r_;
|
h[0] = h[0] % r_;
|
||||||
h[1] = h[1] % r_ + r_;
|
h[1] = h[1] % r_ + r_;
|
||||||
h[2] = h[2] % r_ + (r_ << 1);
|
h[2] = h[2] % r_ + (r_ << 1);
|
||||||
@ -169,7 +170,7 @@ class SimpleMPHIndex : public MPHIndex {
|
|||||||
bool Reset(ForwardIterator begin, ForwardIterator end) {
|
bool Reset(ForwardIterator begin, ForwardIterator end) {
|
||||||
return MPHIndex::Reset<HashFcn>(begin, end);
|
return MPHIndex::Reset<HashFcn>(begin, end);
|
||||||
}
|
}
|
||||||
uint32_t index(const Key& key) { return MPHIndex::index<HashFcn>(key); }
|
uint32_t index(const Key& key) const { return MPHIndex::index<HashFcn>(key); }
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace cxxmph
|
} // namespace cxxmph
|
||||||
|
@ -24,7 +24,7 @@ int main(int argc, char** argv) {
|
|||||||
keys.push_back("algume");
|
keys.push_back("algume");
|
||||||
|
|
||||||
SimpleMPHIndex<string> mph_index;
|
SimpleMPHIndex<string> mph_index;
|
||||||
assert(mph_index.Reset(keys.begin(), keys.end()));
|
if (!mph_index.Reset(keys.begin(), keys.end())) { exit(-1); }
|
||||||
vector<int> ids;
|
vector<int> ids;
|
||||||
for (vector<int>::size_type i = 0; i < keys.size(); ++i) {
|
for (vector<int>::size_type i = 0; i < keys.size(); ++i) {
|
||||||
ids.push_back(mph_index.index(keys[i]));
|
ids.push_back(mph_index.index(keys[i]));
|
||||||
@ -33,7 +33,6 @@ int main(int argc, char** argv) {
|
|||||||
cerr << endl;
|
cerr << endl;
|
||||||
sort(ids.begin(), ids.end());
|
sort(ids.begin(), ids.end());
|
||||||
for (vector<int>::size_type i = 0; i < ids.size(); ++i) assert(ids[i] == static_cast<vector<int>::value_type>(i));
|
for (vector<int>::size_type i = 0; i < ids.size(); ++i) assert(ids[i] == static_cast<vector<int>::value_type>(i));
|
||||||
|
|
||||||
char* serialized = new char[mph_index.serialize_bytes_needed()];
|
char* serialized = new char[mph_index.serialize_bytes_needed()];
|
||||||
mph_index.serialize(serialized);
|
mph_index.serialize(serialized);
|
||||||
SimpleMPHIndex<string> other_mph_index;
|
SimpleMPHIndex<string> other_mph_index;
|
||||||
|
@ -52,11 +52,17 @@ class mph_map {
|
|||||||
std::pair<iterator, bool> insert(const value_type& x);
|
std::pair<iterator, bool> insert(const value_type& x);
|
||||||
iterator find(const key_type& k);
|
iterator find(const key_type& k);
|
||||||
const_iterator find(const key_type& k) const;
|
const_iterator find(const key_type& k) const;
|
||||||
|
typedef int32_t my_int32_t;
|
||||||
|
int32_t index(const key_type& k) const;
|
||||||
data_type& operator[](const key_type &k);
|
data_type& operator[](const key_type &k);
|
||||||
|
const data_type& operator[](const key_type &k) const;
|
||||||
|
|
||||||
size_type bucket_count() const { return size(); }
|
size_type bucket_count() const { return size(); }
|
||||||
void rehash(size_type nbuckets /*ignored*/) { pack(); }
|
void rehash(size_type nbuckets /*ignored*/) { pack(); }
|
||||||
|
|
||||||
|
protected: // mimicking STL implementation
|
||||||
|
EqualKey equal_;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
template <typename iterator>
|
template <typename iterator>
|
||||||
struct iterator_first : public iterator {
|
struct iterator_first : public iterator {
|
||||||
@ -145,30 +151,33 @@ MPH_MAP_METHOD_DECL(void_type, erase)(const key_type& k) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
MPH_MAP_METHOD_DECL(const_iterator, find)(const key_type& k) const {
|
MPH_MAP_METHOD_DECL(const_iterator, find)(const key_type& k) const {
|
||||||
if (!slack_.empty()) {
|
if (__builtin_expect(!slack_.empty(), 0)) {
|
||||||
typename slack_type::const_iterator it = slack_.find(k);
|
typename slack_type::const_iterator it = slack_.find(k);
|
||||||
if (it != slack_.end()) return values_.begin() + it->second;
|
if (it != slack_.end()) return values_.begin() + it->second;
|
||||||
}
|
|
||||||
if (index_.size() == 0) return end();
|
|
||||||
size_type id = index_.index(k);
|
|
||||||
if (key_equal()(values_[id].first, k)) {
|
|
||||||
return values_.begin() + id;
|
|
||||||
}
|
}
|
||||||
|
if (__builtin_expect(index_.size() == 0, 0)) return end();
|
||||||
|
auto it = values_.begin() + index_.index(k);
|
||||||
|
if (__builtin_expect(equal_(k, it->first), 1)) return it;
|
||||||
return end();
|
return end();
|
||||||
}
|
}
|
||||||
|
|
||||||
MPH_MAP_METHOD_DECL(iterator, find)(const key_type& k) {
|
MPH_MAP_METHOD_DECL(iterator, find)(const key_type& k) {
|
||||||
if (!slack_.empty()) {
|
if (!slack_.empty()) {
|
||||||
typename slack_type::const_iterator it = slack_.find(k);
|
typename slack_type::const_iterator it = slack_.find(k);
|
||||||
if (it != slack_.end()) return values_.begin() + it->second;
|
if (it != slack_.end()) return values_.begin() + it->second;
|
||||||
}
|
}
|
||||||
if (index_.size() == 0) return end();
|
if (index_.size() == 0) return end();
|
||||||
size_type id = index_.index(k);
|
auto it = values_.begin() + index_.index(k);
|
||||||
if (key_equal()(values_[id].first, k)) {
|
if (equal_(it->first, k)) return it;
|
||||||
return values_.begin() + id;
|
|
||||||
}
|
|
||||||
return end();
|
return end();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MPH_MAP_METHOD_DECL(my_int32_t, index)(const key_type& k) const {
|
||||||
|
assert(slack_.empty());
|
||||||
|
if (index_.size() == 0) return -1;
|
||||||
|
return index_.index(k);
|
||||||
|
}
|
||||||
|
|
||||||
MPH_MAP_METHOD_DECL(data_type&, operator[])(const key_type& k) {
|
MPH_MAP_METHOD_DECL(data_type&, operator[])(const key_type& k) {
|
||||||
return insert(std::make_pair(k, data_type())).first->second;
|
return insert(std::make_pair(k, data_type())).first->second;
|
||||||
}
|
}
|
||||||
|
@ -19,10 +19,17 @@ struct seeded_hash_function {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct seeded_identity_function {
|
||||||
|
template <class Key>
|
||||||
|
uint32_t operator()(const Key& k, uint32_t seed) const {
|
||||||
|
return k ^ seed;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
struct Murmur2 {
|
struct Murmur2 {
|
||||||
template<class Key>
|
template<class Key>
|
||||||
uint32_t operator()(const Key& k) const {
|
uint32_t operator()(const Key& k) const {
|
||||||
return MurmurHash2(k, sizeof(Key), 1 /* seed */);
|
return MurmurHash2(reinterpret_cast<const void*>(&k), sizeof(Key), 1 /* seed */);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
struct Murmur2StringPiece {
|
struct Murmur2StringPiece {
|
||||||
|
Loading…
Reference in New Issue
Block a user