Adding support for miss benchmarks. Need to fix myfind methods.

This commit is contained in:
Davi Reis 2012-03-06 18:25:05 -08:00
parent 3ba778f671
commit 7b6c163075
5 changed files with 86 additions and 7 deletions

View File

@ -14,7 +14,7 @@ mph_map_test_SOURCES = mph_map_test.cc
mph_index_test_LDADD = libcxxmph.la
mph_index_test_SOURCES = mph_index_test.cc
bm_index_LDADD = libcxxmph.la
bm_index_LDADD = libcxxmph.la -lcmph
bm_index_SOURCES = bm_common.cc bm_index.cc
trigraph_test_LDADD = libcxxmph.la

View File

@ -1,4 +1,6 @@
#include <cmath>
#include <fstream>
#include <limits>
#include <iostream>
#include <set>
@ -32,9 +34,15 @@ bool UrlsBenchmark::SetUp() {
bool SearchUrlsBenchmark::SetUp() {
if (!UrlsBenchmark::SetUp()) return false;
int32_t miss_ratio_int32 = std::numeric_limits<int32_t>::max() * miss_ratio_;
forced_miss_urls_.resize(nsearches_);
random_.resize(nsearches_);
for (int i = 0; i < nsearches_; ++i) {
random_[i] = urls_[random() % urls_.size()];
if (random() < miss_ratio_int32) {
forced_miss_urls_[i] = random_[i].as_string() + ".force_miss";
random_[i] = forced_miss_urls_[i];
}
}
return true;
}

View File

@ -32,10 +32,12 @@ class UrlsBenchmark : public Benchmark {
class SearchUrlsBenchmark : public UrlsBenchmark {
public:
SearchUrlsBenchmark(const std::string& urls_file, uint32_t nsearches)
: UrlsBenchmark(urls_file), nsearches_(nsearches) {}
: UrlsBenchmark(urls_file), nsearches_(nsearches), miss_ratio_(0.2) {}
protected:
virtual bool SetUp();
const uint32_t nsearches_;
float miss_ratio_;
std::vector<std::string> forced_miss_urls_;
std::vector<StringPiece> random_;
};

View File

@ -1,3 +1,6 @@
#include <cmph.h>
#include <cstdio>
#include <set>
#include <string>
#include <unordered_map>
@ -56,6 +59,56 @@ class BM_MPHIndexSearch : public SearchUrlsBenchmark {
SimpleMPHIndex<StringPiece> index_;
};
class BM_CmphIndexSearch : public SearchUrlsBenchmark {
public:
BM_CmphIndexSearch(const std::string& urls_file, int nsearches)
: SearchUrlsBenchmark(urls_file, nsearches) { }
~BM_CmphIndexSearch() { if (index_) cmph_destroy(index_); }
virtual void Run() {
for (auto it = random_.begin(); it != random_.end(); ++it) {
auto idx = cmph_search(index_, it->data(), it->length());
// Collision check to be fair with STL
if (strcmp(urls_[idx].c_str(), it->data()) != 0) idx = -1;
}
}
protected:
virtual bool SetUp() {
if (!SearchUrlsBenchmark::SetUp()) {
cerr << "Parent class setup failed." << endl;
return false;
}
FILE* f = fopen(urls_file_.c_str(), "r");
if (!f) {
cerr << "Faied to open " << urls_file_ << endl;
return false;
}
cmph_io_adapter_t* source = cmph_io_nlfile_adapter(f);
if (!source) {
cerr << "Faied to create io adapter for " << urls_file_ << endl;
return false;
}
cmph_config_t* config = cmph_config_new(source);
if (!config) {
cerr << "Failed to create config" << endl;
return false;
}
cmph_config_set_algo(config, CMPH_BDZ);
cmph_t* mphf = cmph_new(config);
if (!mphf) {
cerr << "Failed to create mphf." << endl;
return false;
}
cmph_config_destroy(config);
cmph_io_nlfile_adapter_destroy(source);
fclose(f);
index_ = mphf;
return true;
}
cmph_t* index_;
};
class BM_STLIndexSearch : public SearchUrlsBenchmark {
public:
BM_STLIndexSearch(const std::string& urls_file, int nsearches)
@ -80,10 +133,13 @@ class BM_STLIndexSearch : public SearchUrlsBenchmark {
};
int main(int argc, char** argv) {
/*
Benchmark::Register(new BM_MPHIndexCreate("URLS100k"));
Benchmark::Register(new BM_STLIndexCreate("URLS100k"));
Benchmark::Register(new BM_MPHIndexSearch("URLS100k", 100*1000*1000));
Benchmark::Register(new BM_STLIndexSearch("URLS100k", 100*1000*1000));
*/
Benchmark::Register(new BM_CmphIndexSearch("URLS100k", 100*1000*1000));
Benchmark::RunAll();
return 0;
}

View File

@ -11,7 +11,9 @@
namespace cxxmph {
uint64_t myfind(const unordered_map<uint64_t, uint64_t>& mymap, const uint64_t& k) {
return mymap.find(k)->second;
auto it = mymap.find(k);
if (it == mymap.end()) return -1;
return it->second;
}
uint64_t myfind(const mph_map<uint64_t, uint64_t>& mymap, const uint64_t& k) {
@ -19,7 +21,9 @@
}
const StringPiece& myfind(const unordered_map<StringPiece, StringPiece, Murmur2StringPiece>& mymap, const StringPiece& k) {
return mymap.find(k)->second;
auto it = mymap.find(k);
if (it == mymap.end()) return ".force_miss";
return it->second;
}
StringPiece myfind(const mph_map<StringPiece, StringPiece>& mymap, const StringPiece& k) {
auto it = mymap.find(k);
@ -44,13 +48,22 @@ class BM_SearchUrls : public SearchUrlsBenchmark {
BM_SearchUrls(const std::string& urls_file, int nsearches)
: SearchUrlsBenchmark(urls_file, nsearches) { }
virtual void Run() {
fprintf(stderr, "Running benchmark\n");
for (auto it = random_.begin(); it != random_.end(); ++it) {
if (it->ends_with(".force_miss")) {
fprintf(stderr, "About to miss\n");
} else {
fprintf(stderr, "No miss\n");
}
fprintf(stderr, "it: *%s\n", it->as_string().c_str());
auto v = myfind(mymap_, *it);
if (v != *it) {
fprintf(stderr, "v: %s, it: *%s\n", v.as_string().c_str(), it->as_string().c_str());
if (v != *it && !it->ends_with(".force_miss")) {
fprintf(stderr, "Looked for %s got %s\n", it->data(), v.data());
exit(-1);
}
}
fprintf(stderr, "Done running benchmark\n");
}
protected:
virtual bool SetUp() {
@ -102,8 +115,8 @@ int main(int argc, char** argv) {
Benchmark::Register(new BM_CreateUrls<mph_map<StringPiece, StringPiece>>("URLS100k"));
Benchmark::Register(new BM_CreateUrls<unordered_map<StringPiece, StringPiece>>("URLS100k"));
*/
Benchmark::Register(new BM_SearchUrls<mph_map<StringPiece, StringPiece>>("URLS100k", 10*1000* 1000));
Benchmark::Register(new BM_SearchUrls<unordered_map<StringPiece, StringPiece, Murmur2StringPiece>>("URLS100k", 10*1000* 1000));
// Benchmark::Register(new BM_SearchUrls<mph_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000));
Benchmark::Register(new BM_SearchUrls<unordered_map<StringPiece, StringPiece, Murmur2StringPiece>>("URLS100k", 10));
/*
Benchmark::Register(new BM_SearchUint64<unordered_map<uint64_t, uint64_t>>);
Benchmark::Register(new BM_SearchUint64<mph_map<uint64_t, uint64_t>>);