diff --git a/cxxmph/bm_common.cc b/cxxmph/bm_common.cc new file mode 100644 index 0000000..f0e0336 --- /dev/null +++ b/cxxmph/bm_common.cc @@ -0,0 +1,62 @@ +#include +#include +#include + +#include "bm_common.h" + +using std::cerr; +using std::endl; +using std::set; +using std::string; +using std::vector; + +namespace cxxmph { + +bool UrlsBenchmark::SetUp() { + vector urls; + std::ifstream f(urls_file_.c_str()); + if (!f.is_open()) { + cerr << "Failed to open urls file " << urls_file_ << endl; + return false; + } + string buffer; + while(std::getline(f, buffer)) urls.push_back(buffer); + set unique(urls.begin(), urls.end()); + if (unique.size() != urls.size()) { + cerr << "Input file has repeated keys." << endl; + return false; + } + urls.swap(urls_); + return true; +} + +bool SearchUrlsBenchmark::SetUp() { + if (!UrlsBenchmark::SetUp()) return false; + random_.resize(nsearches_); + for (int i = 0; i < nsearches_; ++i) { + random_[i] = urls_[random() % urls_.size()]; + } + return true; +} + +bool Uint64Benchmark::SetUp() { + set unique; + for (int i = 0; i < count_; ++i) { + uint64_t v; + do { v = random(); } while (unique.find(v) != unique.end()); + values_.push_back(v); + unique.insert(v); + } + return true; +} + +bool SearchUint64Benchmark::SetUp() { + if (!Uint64Benchmark::SetUp()) return false; + random_.resize(nsearches_); + for (int i = 0; i < nsearches_; ++i) { + random_.push_back(values_[random() % values_.size()]); + } + return true; +} + +} // namespace cxxmph diff --git a/cxxmph/bm_common.h b/cxxmph/bm_common.h new file mode 100644 index 0000000..70a5f5e --- /dev/null +++ b/cxxmph/bm_common.h @@ -0,0 +1,62 @@ +#include "stringpiece.h" + +#include +#include +#include // for std::tr1::hash +#include "MurmurHash2.h" + +#include "benchmark.h" + +namespace std { +namespace tr1 { +template <> struct hash { + uint32_t operator()(const cxxmph::StringPiece& k) const { + return cxxmph::MurmurHash2(k.data(), k.length(), 1); + } +}; +} // namespace tr1 +} // namespace std + +namespace cxxmph { + +class UrlsBenchmark : public Benchmark { + public: + UrlsBenchmark(const std::string& urls_file) : urls_file_(urls_file) { } + protected: + virtual bool SetUp(); + const std::string urls_file_; + std::vector urls_; +}; + +class SearchUrlsBenchmark : public UrlsBenchmark { + public: + SearchUrlsBenchmark(const std::string& urls_file, uint32_t nsearches) + : UrlsBenchmark(urls_file), nsearches_(nsearches) {} + protected: + virtual bool SetUp(); + const uint32_t nsearches_; + std::vector random_; +}; + +class Uint64Benchmark : public Benchmark { + public: + Uint64Benchmark(uint32_t count) : count_(count) { } + virtual void Run() {} + protected: + virtual bool SetUp(); + const uint32_t count_; + std::vector values_; +}; + +class SearchUint64Benchmark : public Uint64Benchmark { + public: + SearchUint64Benchmark(uint32_t count, uint32_t nsearches) + : Uint64Benchmark(count), nsearches_(nsearches) { } + virtual void Run() {}; + protected: + virtual bool SetUp(); + const uint32_t nsearches_; + std::vector random_; +}; + +} // namespace cxxmph diff --git a/cxxmph/bm_index.cc b/cxxmph/bm_index.cc new file mode 100644 index 0000000..03cb222 --- /dev/null +++ b/cxxmph/bm_index.cc @@ -0,0 +1,81 @@ +#include +#include +#include + +#include "bm_common.h" +#include "StringPiece.h" +#include "mph_index.h" + +using namespace cxxmph; + +using std::string; +using std::tr1::unordered_set; + +class BM_MPHIndexCreate : public UrlsBenchmark { + public: + BM_MPHIndexCreate(const std::string& urls_file) + : UrlsBenchmark(urls_file) { } + protected: + virtual void Run() { + SimpleMPHIndex index; + index.Reset(urls_.begin(), urls_.end()); + } +}; + +class BM_STLIndexCreate : public UrlsBenchmark { + public: + BM_STLIndexCreate(const std::string& urls_file) + : UrlsBenchmark(urls_file) { } + protected: + virtual void Run() { + unordered_set index; + index.insert(urls_.begin(), urls_.end()); + } +}; + +class BM_MPHIndexSearch : public SearchUrlsBenchmark { + public: + BM_MPHIndexSearch(const std::string& urls_file, int nsearches) + : SearchUrlsBenchmark(urls_file, nsearches) { } + virtual void Run() { + while (true) { + for (auto it = random_.begin(); it != random_.end(); ++it) { + index_.index(*it); + } + } + } + protected: + virtual bool SetUp () { + if (!SearchUrlsBenchmark::SetUp()) return false; + index_.Reset(urls_.begin(), urls_.end()); + return true; + } + SimpleMPHIndex index_; +}; + +class BM_STLIndexSearch : public SearchUrlsBenchmark { + public: + BM_STLIndexSearch(const std::string& urls_file, int nsearches) + : SearchUrlsBenchmark(urls_file, nsearches) { } + virtual void Run() { + for (auto it = random_.begin(); it != random_.end(); ++it) { + index_.find(*it); // - index_.begin(); + } + } + protected: + virtual bool SetUp () { + if (!SearchUrlsBenchmark::SetUp()) return false; + std::tr1::unordered_set(urls_.begin(), urls_.end()).swap(index_); + return true; + } + std::tr1::unordered_set index_; +}; + +int main(int argc, char** argv) { + Benchmark::Register(new BM_MPHIndexCreate("URLS100k")); + Benchmark::Register(new BM_STLIndexCreate("URLS100k")); + Benchmark::Register(new BM_MPHIndexSearch("URLS100k", 1000*1000)); + Benchmark::Register(new BM_STLIndexSearch("URLS100k", 1000*1000)); + Benchmark::RunAll(); + return 0; +}