diff --git a/cxxmph/Makefile.am b/cxxmph/Makefile.am index f18fd6e..6149a53 100644 --- a/cxxmph/Makefile.am +++ b/cxxmph/Makefile.am @@ -1,10 +1,10 @@ -check_PROGRAMS = cmph_hash_map_test mphtable_test trigraph_test bm_urls -noinst_PROGRAMS = bm_urls +check_PROGRAMS = cmph_hash_map_test mphtable_test trigraph_test +noinst_PROGRAMS = bm_numbers bm_urls bin_PROGRAMS = cxxmph lib_LTLIBRARIES = libcxxmph.la include_HEADERS = cmph_hash_map.h mphtable.h MurmurHash2.h trigraph.h cxxmph_hash.h stringpiece.h -libcxxmph_la_SOURCES = MurmurHash2.h trigragh.h trigraph.cc mphtable.h mphtable.cc cxxmph_hash.h stringpiece.h +libcxxmph_la_SOURCES = MurmurHash2.h trigragh.h trigraph.cc mphtable.h mphtable.cc cxxmph_hash.h stringpiece.h benchmark.h benchmark.cc libcxxmph_la_LDFLAGS = -version-info 0:0:0 cmph_hash_map_test_LDADD = libcxxmph.la @@ -16,6 +16,9 @@ mphtable_test_SOURCES = mphtable_test.cc trigraph_test_LDADD = libcxxmph.la trigraph_test_SOURCES = trigraph_test.cc +bm_numbers_LDADD = libcxxmph.la +bm_numbers_SOURCES = bm_numbers.cc + bm_urls_LDADD = libcxxmph.la bm_urls_SOURCES = bm_urls.cc diff --git a/cxxmph/MurmurHash2.h b/cxxmph/MurmurHash2.h index d817c7b..0d318a3 100644 --- a/cxxmph/MurmurHash2.h +++ b/cxxmph/MurmurHash2.h @@ -17,6 +17,7 @@ namespace cxxmph { +inline // not measured, for making compilation easier only unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed ) { // 'm' and 'r' are mixing constants generated offline. diff --git a/cxxmph/benchmark.cc b/cxxmph/benchmark.cc new file mode 100644 index 0000000..9a45491 --- /dev/null +++ b/cxxmph/benchmark.cc @@ -0,0 +1,104 @@ +#include "benchmark.h" + +#include +#include +#include + +#include +#include + +using std::cerr; +using std::endl; +using std::string; +using std::vector; + +namespace { + +/* Subtract the `struct timeval' values X and Y, + storing the result in RESULT. + Return 1 if the difference is negative, otherwise 0. */ +int timeval_subtract ( + struct timeval *result, struct timeval *x, struct timeval* y) { + /* Perform the carry for the later subtraction by updating y. */ + if (x->tv_usec < y->tv_usec) { + int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1; + y->tv_usec -= 1000000 * nsec; + y->tv_sec += nsec; + } + if (x->tv_usec - y->tv_usec > 1000000) { + int nsec = (x->tv_usec - y->tv_usec) / 1000000; + y->tv_usec += 1000000 * nsec; + y->tv_sec -= nsec; + } + + /* Compute the time remaining to wait. + tv_usec is certainly positive. */ + result->tv_sec = x->tv_sec - y->tv_sec; + result->tv_usec = x->tv_usec - y->tv_usec; + + /* Return 1 if result is negative. */ + return x->tv_sec < y->tv_sec; +} + +struct rusage getrusage_or_die() { + struct rusage rs; + int ret = getrusage(RUSAGE_SELF, &rs); + if (ret != 0) { + cerr << "rusage failed: " << strerror(errno) << endl; + exit(-1); + } + return rs; +} + +#ifdef HAVE_CXA_DEMANGLE +string demangle(const string& name) { + char buf[1024]; + unsigned int size = 1024; + int status; + char* res = abi::__cxa_demangle( + name.c_str(), buf, &size, &status); + return res; +} +#else +string demangle(const string& name) { return name; } +#endif + + +static vector g_benchmarks; + +} // anonymous namespace + +namespace cxxmph { + +/* static */ void Benchmark::Register(Benchmark* bm) { + if (bm->name().empty()) { + string name = demangle(typeid(*bm).name()); + bm->set_name(name); + } + g_benchmarks.push_back(bm); +} + +/* static */ void Benchmark::RunAll() { + for (auto it = g_benchmarks.begin(); it != g_benchmarks.end(); ++it) { + (*it)->MeasureRun(); + delete *it; + } +} + +void Benchmark::MeasureRun() { + struct rusage begin = getrusage_or_die(); + Run(iters_); + struct rusage end = getrusage_or_die(); + + struct timeval utime; + timeval_subtract(&utime, &end.ru_utime, &begin.ru_utime); + struct timeval stime; + timeval_subtract(&stime, &end.ru_stime, &begin.ru_stime); + + printf("Benchmark: %s\n", name().c_str()); + printf("User time used : %ld.%06ld\n", utime.tv_sec, utime.tv_usec); + printf("System time used: %ld.%06ld\n", stime.tv_sec, stime.tv_usec); + printf("\n"); +} + +} // namespace cxxmph diff --git a/cxxmph/benchmark.h b/cxxmph/benchmark.h new file mode 100644 index 0000000..f0629e4 --- /dev/null +++ b/cxxmph/benchmark.h @@ -0,0 +1,31 @@ +#ifndef __CXXMPH_BENCHMARK_H__ +#define __CXXMPH_BENCHMARK_H__ + +#include +#include + +namespace cxxmph { + +class Benchmark { + public: + Benchmark(int iters = 1) : iters_(iters) { } + virtual void Run(int iters) = 0; + virtual ~Benchmark() { } + const std::string& name() { return name_; } + void set_name(const std::string& name) { name_ = name; } + + static void Register(Benchmark* bm); + static void RunAll(); + + protected: + int iters() { return iters_; } + + private: + int iters_; + std::string name_; + void MeasureRun(); +}; + +} // namespace cxxmph + +#endif diff --git a/cxxmph/bm_numbers.cc b/cxxmph/bm_numbers.cc new file mode 100644 index 0000000..40bef70 --- /dev/null +++ b/cxxmph/bm_numbers.cc @@ -0,0 +1,52 @@ +#include +#include + +#include "benchmark.h" +#include "mphtable.h" + +using std::set; +using std::vector; + +namespace cxxmph { +class BM_NumbersCreate : public Benchmark { + public: + BM_NumbersCreate(int iters = 1) : Benchmark(iters) { + set unique; + while (unique.size() < 1000 * 1000) { + int v = random(); + if (unique.find(v) == unique.end()) { + unique.insert(v); + random_unique_.push_back(v); + } + } + } + protected: + virtual void Run(int iters) { + SimpleMPHTable table; + table.Reset(random_unique_.begin(), random_unique_.end()); + } + std::vector random_unique_; +}; + +class BM_NumbersFind : public BM_NumbersCreate { + public: + BM_NumbersFind(int iters) : BM_NumbersCreate(iters) { table_.Reset(random_unique_.begin(), random_unique_.end()); } + virtual void Run(int iters) { + for (int i = 0; i < iters * 100; ++i) { + int pos = random() % random_unique_.size();; + int h = table_.index(pos); + } + } + private: + SimpleMPHTable table_; +}; + +} // namespace cxxmph + +using namespace cxxmph; + +int main(int argc, char** argv) { + Benchmark::Register(new BM_NumbersCreate()); + Benchmark::Register(new BM_NumbersFind(1000 * 1000)); + Benchmark::RunAll(); +} diff --git a/cxxmph/bm_urls.cc b/cxxmph/bm_urls.cc index 7d43e2f..916c725 100644 --- a/cxxmph/bm_urls.cc +++ b/cxxmph/bm_urls.cc @@ -1,21 +1,70 @@ -#include -#include #include +#include +#include +#include #include +#include -#include "mphtable.h" +#include "benchmark.h" +#include "cmph_hash_map.h" using std::ifstream; +using std::set; using std::string; using std::vector; -using cxxmph::SimpleMPHTable; + +namespace cxxmph { + +class BM_UrlsCreate : public Benchmark { + public: + BM_UrlsCreate(int iters = 1) : Benchmark(iters) { + ReadUrls(); + } + protected: + virtual void Run(int iters) { + BuildTable(); + } + void BuildTable() { + for (auto it = urls_.begin(); it != urls_.end(); ++it) { + table_[*it] = it - urls_.begin(); + } + table_.pack(); + } + void ReadUrls() { + vector urls; + std::ifstream f("URLS100k"); + string buffer; + while(std::getline(f, buffer)) urls.push_back(buffer); + set unique(urls.begin(), urls.end()); + if (unique.size() != urls.size()) { + cerr << "Input file has repeated keys." << endl; + exit(-1); + } + urls_.swap(urls); + } + vector urls_; + cxxmph::cmph_hash_map table_; +}; + +class BM_UrlsFind : public BM_UrlsCreate { + public: + BM_UrlsFind(int iters = 1) : BM_UrlsCreate(iters) { ReadUrls(); BuildTable(); } + protected: + virtual void Run(int iters) { + for (int i = 0; i < iters * 100; ++i) { + int pos = random() % urls_.size();; + int h = table_[urls_[pos]]; + assert(h == pos); + } + } +}; + +} // namespace cxxmph + +using namespace cxxmph; int main(int argc, char** argv) { - vector urls; - std::ifstream f("URLS1k"); - string buffer; - while(std::getline(f, buffer)) urls.push_back(buffer); - - SimpleMPHTable table; - table.Reset(urls.begin(), urls.end()); + Benchmark::Register(new BM_UrlsCreate()); + Benchmark::Register(new BM_UrlsFind(1000 * 1000)); + Benchmark::RunAll(); } diff --git a/cxxmph/mphtable.h b/cxxmph/mphtable.h index a899a89..340b3db 100644 --- a/cxxmph/mphtable.h +++ b/cxxmph/mphtable.h @@ -85,13 +85,13 @@ bool MPHTable::Reset(ForwardIterator begin, ForwardIterator end) { n_ = 3*r_; k_ = 1U << b_; - cerr << "m " << m_ << " n " << n_ << " r " << r_ << endl; + // cerr << "m " << m_ << " n " << n_ << " r " << r_ << endl; int iterations = 10; std::vector edges; std::vector queue; while (1) { - cerr << "Iterations missing: " << iterations << endl; + // cerr << "Iterations missing: " << iterations << endl; for (int i = 0; i < 3; ++i) hash_seed_[i] = random() % m_; // for (int i = 0; i < 3; ++i) hash_seed_[i] = random() + i; if (Mapping(begin, end, &edges, &queue)) break;