2012-03-07 04:25:05 +02:00
|
|
|
#include <cmph.h>
|
|
|
|
|
|
|
|
#include <cstdio>
|
2011-06-13 09:14:15 +03:00
|
|
|
#include <set>
|
|
|
|
#include <string>
|
2011-11-10 20:44:37 +02:00
|
|
|
#include <unordered_map>
|
2011-06-13 09:14:15 +03:00
|
|
|
|
|
|
|
#include "bm_common.h"
|
2011-06-14 08:24:40 +03:00
|
|
|
#include "stringpiece.h"
|
2011-06-13 09:14:15 +03:00
|
|
|
#include "mph_index.h"
|
|
|
|
|
|
|
|
using namespace cxxmph;
|
|
|
|
|
|
|
|
using std::string;
|
2011-11-10 20:44:37 +02:00
|
|
|
using std::unordered_map;
|
2011-06-13 09:14:15 +03:00
|
|
|
|
|
|
|
class BM_MPHIndexCreate : public UrlsBenchmark {
|
|
|
|
public:
|
|
|
|
BM_MPHIndexCreate(const std::string& urls_file)
|
|
|
|
: UrlsBenchmark(urls_file) { }
|
|
|
|
protected:
|
|
|
|
virtual void Run() {
|
|
|
|
SimpleMPHIndex<StringPiece> index;
|
2012-03-12 04:21:18 +02:00
|
|
|
index.Reset(urls_.begin(), urls_.end(), urls_.size());
|
2011-06-13 09:14:15 +03:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
class BM_STLIndexCreate : public UrlsBenchmark {
|
|
|
|
public:
|
|
|
|
BM_STLIndexCreate(const std::string& urls_file)
|
|
|
|
: UrlsBenchmark(urls_file) { }
|
|
|
|
protected:
|
|
|
|
virtual void Run() {
|
2011-06-14 08:24:40 +03:00
|
|
|
unordered_map<StringPiece, uint32_t> index;
|
|
|
|
int idx = 0;
|
|
|
|
for (auto it = urls_.begin(); it != urls_.end(); ++it) {
|
|
|
|
index.insert(make_pair(*it, idx++));
|
|
|
|
}
|
2011-06-13 09:14:15 +03:00
|
|
|
}
|
|
|
|
};
|
2011-11-10 20:44:37 +02:00
|
|
|
|
2011-06-13 09:14:15 +03:00
|
|
|
class BM_MPHIndexSearch : public SearchUrlsBenchmark {
|
|
|
|
public:
|
|
|
|
BM_MPHIndexSearch(const std::string& urls_file, int nsearches)
|
2012-03-07 08:00:17 +02:00
|
|
|
: SearchUrlsBenchmark(urls_file, nsearches, 0) { }
|
2011-06-13 09:14:15 +03:00
|
|
|
virtual void Run() {
|
2012-06-01 23:49:00 +03:00
|
|
|
uint64_t sum = 0;
|
2011-06-13 09:14:15 +03:00
|
|
|
for (auto it = random_.begin(); it != random_.end(); ++it) {
|
2011-06-14 08:24:40 +03:00
|
|
|
auto idx = index_.index(*it);
|
|
|
|
// Collision check to be fair with STL
|
2012-04-15 06:03:00 +03:00
|
|
|
if (strcmp(urls_[idx].c_str(), it->data()) != 0) idx = -1;
|
2012-06-01 23:49:00 +03:00
|
|
|
sum += idx;
|
2011-06-13 09:14:15 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
protected:
|
|
|
|
virtual bool SetUp () {
|
|
|
|
if (!SearchUrlsBenchmark::SetUp()) return false;
|
2012-03-12 04:21:18 +02:00
|
|
|
index_.Reset(urls_.begin(), urls_.end(), urls_.size());
|
2011-06-13 09:14:15 +03:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
SimpleMPHIndex<StringPiece> index_;
|
|
|
|
};
|
|
|
|
|
2012-03-07 04:25:05 +02:00
|
|
|
class BM_CmphIndexSearch : public SearchUrlsBenchmark {
|
|
|
|
public:
|
|
|
|
BM_CmphIndexSearch(const std::string& urls_file, int nsearches)
|
2012-03-07 08:00:17 +02:00
|
|
|
: SearchUrlsBenchmark(urls_file, nsearches, 0) { }
|
2012-03-07 04:25:05 +02:00
|
|
|
~BM_CmphIndexSearch() { if (index_) cmph_destroy(index_); }
|
|
|
|
virtual void Run() {
|
2012-06-01 23:49:00 +03:00
|
|
|
uint64_t sum = 0;
|
2012-03-07 04:25:05 +02:00
|
|
|
for (auto it = random_.begin(); it != random_.end(); ++it) {
|
|
|
|
auto idx = cmph_search(index_, it->data(), it->length());
|
|
|
|
// Collision check to be fair with STL
|
|
|
|
if (strcmp(urls_[idx].c_str(), it->data()) != 0) idx = -1;
|
2012-06-01 23:49:00 +03:00
|
|
|
sum += idx;
|
2012-03-07 04:25:05 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
protected:
|
|
|
|
virtual bool SetUp() {
|
|
|
|
if (!SearchUrlsBenchmark::SetUp()) {
|
|
|
|
cerr << "Parent class setup failed." << endl;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
FILE* f = fopen(urls_file_.c_str(), "r");
|
|
|
|
if (!f) {
|
|
|
|
cerr << "Faied to open " << urls_file_ << endl;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
cmph_io_adapter_t* source = cmph_io_nlfile_adapter(f);
|
|
|
|
if (!source) {
|
|
|
|
cerr << "Faied to create io adapter for " << urls_file_ << endl;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
cmph_config_t* config = cmph_config_new(source);
|
|
|
|
if (!config) {
|
|
|
|
cerr << "Failed to create config" << endl;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
cmph_config_set_algo(config, CMPH_BDZ);
|
|
|
|
cmph_t* mphf = cmph_new(config);
|
|
|
|
if (!mphf) {
|
|
|
|
cerr << "Failed to create mphf." << endl;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
cmph_config_destroy(config);
|
|
|
|
cmph_io_nlfile_adapter_destroy(source);
|
|
|
|
fclose(f);
|
|
|
|
index_ = mphf;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
cmph_t* index_;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2011-06-13 09:14:15 +03:00
|
|
|
class BM_STLIndexSearch : public SearchUrlsBenchmark {
|
|
|
|
public:
|
|
|
|
BM_STLIndexSearch(const std::string& urls_file, int nsearches)
|
2012-03-07 08:00:17 +02:00
|
|
|
: SearchUrlsBenchmark(urls_file, nsearches, 0) { }
|
2011-06-13 09:14:15 +03:00
|
|
|
virtual void Run() {
|
2012-06-01 23:49:00 +03:00
|
|
|
uint64_t sum = 0;
|
2011-06-13 09:14:15 +03:00
|
|
|
for (auto it = random_.begin(); it != random_.end(); ++it) {
|
2011-06-14 08:24:40 +03:00
|
|
|
auto idx = index_.find(*it);
|
2012-06-01 23:49:00 +03:00
|
|
|
sum += idx->second;
|
2011-06-13 09:14:15 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
protected:
|
|
|
|
virtual bool SetUp () {
|
|
|
|
if (!SearchUrlsBenchmark::SetUp()) return false;
|
2011-06-14 08:24:40 +03:00
|
|
|
unordered_map<StringPiece, uint32_t> index;
|
|
|
|
int idx = 0;
|
|
|
|
for (auto it = urls_.begin(); it != urls_.end(); ++it) {
|
|
|
|
index.insert(make_pair(*it, idx++));
|
|
|
|
}
|
|
|
|
index.swap(index_);
|
2011-06-13 09:14:15 +03:00
|
|
|
return true;
|
|
|
|
}
|
2012-03-07 08:48:20 +02:00
|
|
|
unordered_map<StringPiece, uint32_t> index_;
|
2011-06-13 09:14:15 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
|
|
|
Benchmark::Register(new BM_MPHIndexCreate("URLS100k"));
|
|
|
|
Benchmark::Register(new BM_STLIndexCreate("URLS100k"));
|
2012-03-07 08:48:20 +02:00
|
|
|
Benchmark::Register(new BM_MPHIndexSearch("URLS100k", 10*1000*1000));
|
|
|
|
Benchmark::Register(new BM_STLIndexSearch("URLS100k", 10*1000*1000));
|
|
|
|
Benchmark::Register(new BM_CmphIndexSearch("URLS100k", 10*1000*1000));
|
2011-06-13 09:14:15 +03:00
|
|
|
Benchmark::RunAll();
|
|
|
|
return 0;
|
|
|
|
}
|