diff --git a/cxxmph/Makefile.am b/cxxmph/Makefile.am index 55df057..2e57a18 100644 --- a/cxxmph/Makefile.am +++ b/cxxmph/Makefile.am @@ -14,7 +14,7 @@ mph_map_test_SOURCES = mph_map_test.cc mph_index_test_LDADD = libcxxmph.la mph_index_test_SOURCES = mph_index_test.cc -bm_index_LDADD = libcxxmph.la +bm_index_LDADD = libcxxmph.la -lcmph bm_index_SOURCES = bm_common.cc bm_index.cc trigraph_test_LDADD = libcxxmph.la diff --git a/cxxmph/bm_common.cc b/cxxmph/bm_common.cc index c52b2e5..7e94dcf 100644 --- a/cxxmph/bm_common.cc +++ b/cxxmph/bm_common.cc @@ -1,4 +1,6 @@ +#include #include +#include #include #include @@ -32,9 +34,15 @@ bool UrlsBenchmark::SetUp() { bool SearchUrlsBenchmark::SetUp() { if (!UrlsBenchmark::SetUp()) return false; + int32_t miss_ratio_int32 = std::numeric_limits::max() * miss_ratio_; + forced_miss_urls_.resize(nsearches_); random_.resize(nsearches_); for (int i = 0; i < nsearches_; ++i) { random_[i] = urls_[random() % urls_.size()]; + if (random() < miss_ratio_int32) { + forced_miss_urls_[i] = random_[i].as_string() + ".force_miss"; + random_[i] = forced_miss_urls_[i]; + } } return true; } diff --git a/cxxmph/bm_common.h b/cxxmph/bm_common.h index 4fea687..c67b5cd 100644 --- a/cxxmph/bm_common.h +++ b/cxxmph/bm_common.h @@ -32,10 +32,12 @@ class UrlsBenchmark : public Benchmark { class SearchUrlsBenchmark : public UrlsBenchmark { public: SearchUrlsBenchmark(const std::string& urls_file, uint32_t nsearches) - : UrlsBenchmark(urls_file), nsearches_(nsearches) {} + : UrlsBenchmark(urls_file), nsearches_(nsearches), miss_ratio_(0.2) {} protected: virtual bool SetUp(); const uint32_t nsearches_; + float miss_ratio_; + std::vector forced_miss_urls_; std::vector random_; }; diff --git a/cxxmph/bm_index.cc b/cxxmph/bm_index.cc index 84bf7d2..a7c0a14 100644 --- a/cxxmph/bm_index.cc +++ b/cxxmph/bm_index.cc @@ -1,3 +1,6 @@ +#include + +#include #include #include #include @@ -56,6 +59,56 @@ class BM_MPHIndexSearch : public SearchUrlsBenchmark { SimpleMPHIndex index_; }; +class BM_CmphIndexSearch : public SearchUrlsBenchmark { + public: + BM_CmphIndexSearch(const std::string& urls_file, int nsearches) + : SearchUrlsBenchmark(urls_file, nsearches) { } + ~BM_CmphIndexSearch() { if (index_) cmph_destroy(index_); } + virtual void Run() { + for (auto it = random_.begin(); it != random_.end(); ++it) { + auto idx = cmph_search(index_, it->data(), it->length()); + // Collision check to be fair with STL + if (strcmp(urls_[idx].c_str(), it->data()) != 0) idx = -1; + } + } + protected: + virtual bool SetUp() { + if (!SearchUrlsBenchmark::SetUp()) { + cerr << "Parent class setup failed." << endl; + return false; + } + FILE* f = fopen(urls_file_.c_str(), "r"); + if (!f) { + cerr << "Faied to open " << urls_file_ << endl; + return false; + } + cmph_io_adapter_t* source = cmph_io_nlfile_adapter(f); + if (!source) { + cerr << "Faied to create io adapter for " << urls_file_ << endl; + return false; + } + cmph_config_t* config = cmph_config_new(source); + if (!config) { + cerr << "Failed to create config" << endl; + return false; + } + cmph_config_set_algo(config, CMPH_BDZ); + cmph_t* mphf = cmph_new(config); + if (!mphf) { + cerr << "Failed to create mphf." << endl; + return false; + } + + cmph_config_destroy(config); + cmph_io_nlfile_adapter_destroy(source); + fclose(f); + index_ = mphf; + return true; + } + cmph_t* index_; +}; + + class BM_STLIndexSearch : public SearchUrlsBenchmark { public: BM_STLIndexSearch(const std::string& urls_file, int nsearches) @@ -80,10 +133,13 @@ class BM_STLIndexSearch : public SearchUrlsBenchmark { }; int main(int argc, char** argv) { +/* Benchmark::Register(new BM_MPHIndexCreate("URLS100k")); Benchmark::Register(new BM_STLIndexCreate("URLS100k")); Benchmark::Register(new BM_MPHIndexSearch("URLS100k", 100*1000*1000)); Benchmark::Register(new BM_STLIndexSearch("URLS100k", 100*1000*1000)); +*/ + Benchmark::Register(new BM_CmphIndexSearch("URLS100k", 100*1000*1000)); Benchmark::RunAll(); return 0; } diff --git a/cxxmph/bm_map.cc b/cxxmph/bm_map.cc index 607edc6..8d2aef1 100644 --- a/cxxmph/bm_map.cc +++ b/cxxmph/bm_map.cc @@ -11,7 +11,9 @@ namespace cxxmph { uint64_t myfind(const unordered_map& mymap, const uint64_t& k) { - return mymap.find(k)->second; + auto it = mymap.find(k); + if (it == mymap.end()) return -1; + return it->second; } uint64_t myfind(const mph_map& mymap, const uint64_t& k) { @@ -19,7 +21,9 @@ } const StringPiece& myfind(const unordered_map& mymap, const StringPiece& k) { - return mymap.find(k)->second; + auto it = mymap.find(k); + if (it == mymap.end()) return ".force_miss"; + return it->second; } StringPiece myfind(const mph_map& mymap, const StringPiece& k) { auto it = mymap.find(k); @@ -44,13 +48,22 @@ class BM_SearchUrls : public SearchUrlsBenchmark { BM_SearchUrls(const std::string& urls_file, int nsearches) : SearchUrlsBenchmark(urls_file, nsearches) { } virtual void Run() { + fprintf(stderr, "Running benchmark\n"); for (auto it = random_.begin(); it != random_.end(); ++it) { + if (it->ends_with(".force_miss")) { + fprintf(stderr, "About to miss\n"); + } else { + fprintf(stderr, "No miss\n"); + } + fprintf(stderr, "it: *%s\n", it->as_string().c_str()); auto v = myfind(mymap_, *it); - if (v != *it) { + fprintf(stderr, "v: %s, it: *%s\n", v.as_string().c_str(), it->as_string().c_str()); + if (v != *it && !it->ends_with(".force_miss")) { fprintf(stderr, "Looked for %s got %s\n", it->data(), v.data()); exit(-1); } } + fprintf(stderr, "Done running benchmark\n"); } protected: virtual bool SetUp() { @@ -102,8 +115,8 @@ int main(int argc, char** argv) { Benchmark::Register(new BM_CreateUrls>("URLS100k")); Benchmark::Register(new BM_CreateUrls>("URLS100k")); */ - Benchmark::Register(new BM_SearchUrls>("URLS100k", 10*1000* 1000)); - Benchmark::Register(new BM_SearchUrls>("URLS100k", 10*1000* 1000)); + // Benchmark::Register(new BM_SearchUrls>("URLS100k", 10*1000 * 1000)); + Benchmark::Register(new BM_SearchUrls>("URLS100k", 10)); /* Benchmark::Register(new BM_SearchUint64>); Benchmark::Register(new BM_SearchUint64>);