Added a benchmark to the C++ code.

This commit is contained in:
Davi de Castro Reis 2011-02-18 14:15:24 -08:00
parent 8355e2e1b8
commit 05eaf15d53
7 changed files with 256 additions and 16 deletions

View File

@ -1,10 +1,10 @@
check_PROGRAMS = cmph_hash_map_test mphtable_test trigraph_test bm_urls
noinst_PROGRAMS = bm_urls
check_PROGRAMS = cmph_hash_map_test mphtable_test trigraph_test
noinst_PROGRAMS = bm_numbers bm_urls
bin_PROGRAMS = cxxmph
lib_LTLIBRARIES = libcxxmph.la
include_HEADERS = cmph_hash_map.h mphtable.h MurmurHash2.h trigraph.h cxxmph_hash.h stringpiece.h
libcxxmph_la_SOURCES = MurmurHash2.h trigragh.h trigraph.cc mphtable.h mphtable.cc cxxmph_hash.h stringpiece.h
libcxxmph_la_SOURCES = MurmurHash2.h trigragh.h trigraph.cc mphtable.h mphtable.cc cxxmph_hash.h stringpiece.h benchmark.h benchmark.cc
libcxxmph_la_LDFLAGS = -version-info 0:0:0
cmph_hash_map_test_LDADD = libcxxmph.la
@ -16,6 +16,9 @@ mphtable_test_SOURCES = mphtable_test.cc
trigraph_test_LDADD = libcxxmph.la
trigraph_test_SOURCES = trigraph_test.cc
bm_numbers_LDADD = libcxxmph.la
bm_numbers_SOURCES = bm_numbers.cc
bm_urls_LDADD = libcxxmph.la
bm_urls_SOURCES = bm_urls.cc

View File

@ -17,6 +17,7 @@
namespace cxxmph {
inline // not measured, for making compilation easier only
unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed )
{
// 'm' and 'r' are mixing constants generated offline.

104
cxxmph/benchmark.cc Normal file
View File

@ -0,0 +1,104 @@
#include "benchmark.h"
#include <cstring>
#include <cstdio>
#include <sys/resource.h>
#include <iostream>
#include <vector>
using std::cerr;
using std::endl;
using std::string;
using std::vector;
namespace {
/* Subtract the `struct timeval' values X and Y,
storing the result in RESULT.
Return 1 if the difference is negative, otherwise 0. */
int timeval_subtract (
struct timeval *result, struct timeval *x, struct timeval* y) {
/* Perform the carry for the later subtraction by updating y. */
if (x->tv_usec < y->tv_usec) {
int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
y->tv_usec -= 1000000 * nsec;
y->tv_sec += nsec;
}
if (x->tv_usec - y->tv_usec > 1000000) {
int nsec = (x->tv_usec - y->tv_usec) / 1000000;
y->tv_usec += 1000000 * nsec;
y->tv_sec -= nsec;
}
/* Compute the time remaining to wait.
tv_usec is certainly positive. */
result->tv_sec = x->tv_sec - y->tv_sec;
result->tv_usec = x->tv_usec - y->tv_usec;
/* Return 1 if result is negative. */
return x->tv_sec < y->tv_sec;
}
struct rusage getrusage_or_die() {
struct rusage rs;
int ret = getrusage(RUSAGE_SELF, &rs);
if (ret != 0) {
cerr << "rusage failed: " << strerror(errno) << endl;
exit(-1);
}
return rs;
}
#ifdef HAVE_CXA_DEMANGLE
string demangle(const string& name) {
char buf[1024];
unsigned int size = 1024;
int status;
char* res = abi::__cxa_demangle(
name.c_str(), buf, &size, &status);
return res;
}
#else
string demangle(const string& name) { return name; }
#endif
static vector<cxxmph::Benchmark*> g_benchmarks;
} // anonymous namespace
namespace cxxmph {
/* static */ void Benchmark::Register(Benchmark* bm) {
if (bm->name().empty()) {
string name = demangle(typeid(*bm).name());
bm->set_name(name);
}
g_benchmarks.push_back(bm);
}
/* static */ void Benchmark::RunAll() {
for (auto it = g_benchmarks.begin(); it != g_benchmarks.end(); ++it) {
(*it)->MeasureRun();
delete *it;
}
}
void Benchmark::MeasureRun() {
struct rusage begin = getrusage_or_die();
Run(iters_);
struct rusage end = getrusage_or_die();
struct timeval utime;
timeval_subtract(&utime, &end.ru_utime, &begin.ru_utime);
struct timeval stime;
timeval_subtract(&stime, &end.ru_stime, &begin.ru_stime);
printf("Benchmark: %s\n", name().c_str());
printf("User time used : %ld.%06ld\n", utime.tv_sec, utime.tv_usec);
printf("System time used: %ld.%06ld\n", stime.tv_sec, stime.tv_usec);
printf("\n");
}
} // namespace cxxmph

31
cxxmph/benchmark.h Normal file
View File

@ -0,0 +1,31 @@
#ifndef __CXXMPH_BENCHMARK_H__
#define __CXXMPH_BENCHMARK_H__
#include <string>
#include <typeinfo>
namespace cxxmph {
class Benchmark {
public:
Benchmark(int iters = 1) : iters_(iters) { }
virtual void Run(int iters) = 0;
virtual ~Benchmark() { }
const std::string& name() { return name_; }
void set_name(const std::string& name) { name_ = name; }
static void Register(Benchmark* bm);
static void RunAll();
protected:
int iters() { return iters_; }
private:
int iters_;
std::string name_;
void MeasureRun();
};
} // namespace cxxmph
#endif

52
cxxmph/bm_numbers.cc Normal file
View File

@ -0,0 +1,52 @@
#include <set>
#include <vector>
#include "benchmark.h"
#include "mphtable.h"
using std::set;
using std::vector;
namespace cxxmph {
class BM_NumbersCreate : public Benchmark {
public:
BM_NumbersCreate(int iters = 1) : Benchmark(iters) {
set<int> unique;
while (unique.size() < 1000 * 1000) {
int v = random();
if (unique.find(v) == unique.end()) {
unique.insert(v);
random_unique_.push_back(v);
}
}
}
protected:
virtual void Run(int iters) {
SimpleMPHTable<int> table;
table.Reset(random_unique_.begin(), random_unique_.end());
}
std::vector<int> random_unique_;
};
class BM_NumbersFind : public BM_NumbersCreate {
public:
BM_NumbersFind(int iters) : BM_NumbersCreate(iters) { table_.Reset(random_unique_.begin(), random_unique_.end()); }
virtual void Run(int iters) {
for (int i = 0; i < iters * 100; ++i) {
int pos = random() % random_unique_.size();;
int h = table_.index(pos);
}
}
private:
SimpleMPHTable<int> table_;
};
} // namespace cxxmph
using namespace cxxmph;
int main(int argc, char** argv) {
Benchmark::Register(new BM_NumbersCreate());
Benchmark::Register(new BM_NumbersFind(1000 * 1000));
Benchmark::RunAll();
}

View File

@ -1,21 +1,70 @@
#include <string>
#include <iostream>
#include <fstream>
#include <iostream>
#include <set>
#include <string>
#include <vector>
#include <unordered_map>
#include "mphtable.h"
#include "benchmark.h"
#include "cmph_hash_map.h"
using std::ifstream;
using std::set;
using std::string;
using std::vector;
using cxxmph::SimpleMPHTable;
int main(int argc, char** argv) {
namespace cxxmph {
class BM_UrlsCreate : public Benchmark {
public:
BM_UrlsCreate(int iters = 1) : Benchmark(iters) {
ReadUrls();
}
protected:
virtual void Run(int iters) {
BuildTable();
}
void BuildTable() {
for (auto it = urls_.begin(); it != urls_.end(); ++it) {
table_[*it] = it - urls_.begin();
}
table_.pack();
}
void ReadUrls() {
vector<string> urls;
std::ifstream f("URLS1k");
std::ifstream f("URLS100k");
string buffer;
while(std::getline(f, buffer)) urls.push_back(buffer);
SimpleMPHTable<string> table;
table.Reset(urls.begin(), urls.end());
set<string> unique(urls.begin(), urls.end());
if (unique.size() != urls.size()) {
cerr << "Input file has repeated keys." << endl;
exit(-1);
}
urls_.swap(urls);
}
vector<string> urls_;
cxxmph::cmph_hash_map<string, int> table_;
};
class BM_UrlsFind : public BM_UrlsCreate {
public:
BM_UrlsFind(int iters = 1) : BM_UrlsCreate(iters) { ReadUrls(); BuildTable(); }
protected:
virtual void Run(int iters) {
for (int i = 0; i < iters * 100; ++i) {
int pos = random() % urls_.size();;
int h = table_[urls_[pos]];
assert(h == pos);
}
}
};
} // namespace cxxmph
using namespace cxxmph;
int main(int argc, char** argv) {
Benchmark::Register(new BM_UrlsCreate());
Benchmark::Register(new BM_UrlsFind(1000 * 1000));
Benchmark::RunAll();
}

View File

@ -85,13 +85,13 @@ bool MPHTable::Reset(ForwardIterator begin, ForwardIterator end) {
n_ = 3*r_;
k_ = 1U << b_;
cerr << "m " << m_ << " n " << n_ << " r " << r_ << endl;
// cerr << "m " << m_ << " n " << n_ << " r " << r_ << endl;
int iterations = 10;
std::vector<TriGraph::Edge> edges;
std::vector<uint32_t> queue;
while (1) {
cerr << "Iterations missing: " << iterations << endl;
// cerr << "Iterations missing: " << iterations << endl;
for (int i = 0; i < 3; ++i) hash_seed_[i] = random() % m_;
// for (int i = 0; i < 3; ++i) hash_seed_[i] = random() + i;
if (Mapping<SeededHashFcn>(begin, end, &edges, &queue)) break;