diff --git a/cxxmph/Makefile.am b/cxxmph/Makefile.am index 801d2d0..c02e1c9 100644 --- a/cxxmph/Makefile.am +++ b/cxxmph/Makefile.am @@ -1,6 +1,6 @@ AM_CXXFLAGS='-std=c++0x' TESTS = $(check_PROGRAMS) -check_PROGRAMS = mph_map_test mph_index_test trigraph_test +check_PROGRAMS = mph_map_test mph_index_test # trigraph_test noinst_PROGRAMS = bm_index bm_map bin_PROGRAMS = cxxmph lib_LTLIBRARIES = libcxxmph.la @@ -15,8 +15,8 @@ mph_map_test_SOURCES = mph_map_test.cc mph_index_test_LDADD = libcxxmph.la mph_index_test_SOURCES = mph_index_test.cc -trigraph_test_LDADD = libcxxmph.la -trigraph_test_SOURCES = trigraph_test.cc +# trigraph_test_LDADD = libcxxmph.la +# trigraph_test_SOURCES = trigraph_test.cc bm_index_LDADD = libcxxmph.la bm_index_SOURCES = bm_common.cc bm_index.cc diff --git a/cxxmph/benchmark.cc b/cxxmph/benchmark.cc index 644bdc9..70175e1 100644 --- a/cxxmph/benchmark.cc +++ b/cxxmph/benchmark.cc @@ -3,15 +3,22 @@ #include #include #include +#include #include #include +#include #include +#include #include using std::cerr; +using std::cout; using std::endl; +using std::setfill; +using std::setw; using std::string; +using std::ostringstream; using std::vector; namespace { @@ -42,6 +49,14 @@ int timeval_subtract ( return x->tv_sec < y->tv_sec; } +// C++ iostream is terrible for formatting. +string timeval_to_string(timeval tv) { + ostringstream out; + out << setfill(' ') << setw(3) << tv.tv_sec << '.'; + out << setfill('0') << setw(6) << tv.tv_usec; + return out.str(); +} + struct rusage getrusage_or_die() { struct rusage rs; int ret = getrusage(RUSAGE_SELF, &rs); @@ -92,11 +107,14 @@ namespace cxxmph { /* static */ void Benchmark::RunAll() { for (int i = 0; i < g_benchmarks.size(); ++i) { - Benchmark* bm = g_benchmarks[i]; - bm->SetUp(); + std::auto_ptr bm(g_benchmarks[i]); + if (!bm->SetUp()) { + cerr << "Set up phase for benchmark " + << bm->name() << " failed." << endl; + continue; + } bm->MeasureRun(); bm->TearDown(); - delete bm; } } @@ -114,11 +132,11 @@ void Benchmark::MeasureRun() { struct timeval wtime; timeval_subtract(&wtime, &walltime_end, &walltime_begin); - printf("Benchmark: %s\n", name().c_str()); - printf("CPU User time : %ld.%06ld\n", utime.tv_sec, utime.tv_usec); - printf("CPU System time: %ld.%06ld\n", stime.tv_sec, stime.tv_usec); - printf("Wall clock time: %ld.%06ld\n", wtime.tv_sec, wtime.tv_usec); - printf("\n"); + cout << "Benchmark: " << name_ << endl; + cout << "CPU User time : " << timeval_to_string(utime) << endl; + cout << "CPU System time: " << timeval_to_string(stime) << endl; + cout << "Wall clock time: " << timeval_to_string(wtime) << endl; + cout << endl; } } // namespace cxxmph diff --git a/cxxmph/bm_map.cc b/cxxmph/bm_map.cc index 5e79fbc..423e329 100644 --- a/cxxmph/bm_map.cc +++ b/cxxmph/bm_map.cc @@ -29,11 +29,12 @@ class BM_MapSearch : public SearchUrlsBenchmark { : SearchUrlsBenchmark(urls_file, nsearches) { } virtual void Run() { for (auto it = random_.begin(); it != random_.end(); ++it) { - auto value = mymap_[it->ToString()]; + mymap_.find(*it); } } protected: virtual bool SetUp() { + if (!SearchUrlsBenchmark::SetUp()) return false; for (auto it = urls_.begin(); it != urls_.end(); ++it) { mymap_[*it] = *it; } @@ -48,9 +49,9 @@ class BM_MapSearch : public SearchUrlsBenchmark { using namespace cxxmph; int main(int argc, char** argv) { - Benchmark::Register(new BM_MapCreate>("URLS100k")); - Benchmark::Register(new BM_MapCreate>("URLS100k")); - Benchmark::Register(new BM_MapSearch>("URLS100k", 1000 * 1000)); - Benchmark::Register(new BM_MapSearch>("URLS100k", 1000 * 1000)); + Benchmark::Register(new BM_MapCreate>("URLS100k")); + Benchmark::Register(new BM_MapCreate>("URLS100k")); + Benchmark::Register(new BM_MapSearch>("URLS100k", 1000* 1000)); + Benchmark::Register(new BM_MapSearch>("URLS100k", 1000* 1000)); Benchmark::RunAll(); } diff --git a/cxxmph/mph_index.h b/cxxmph/mph_index.h index 5b0f6c6..d03dd92 100644 --- a/cxxmph/mph_index.h +++ b/cxxmph/mph_index.h @@ -93,6 +93,10 @@ class MPHIndex { // Template method needs to go in the header file. template bool MPHIndex::Reset(ForwardIterator begin, ForwardIterator end) { + if (end == begin) { + clear(); + return true; + } m_ = end - begin; r_ = static_cast(ceil((c_*m_)/3)); if ((r_ % 2) == 0) r_ += 1; diff --git a/cxxmph/seeded_hash.h b/cxxmph/seeded_hash.h index d732d62..99a3ca6 100644 --- a/cxxmph/seeded_hash.h +++ b/cxxmph/seeded_hash.h @@ -60,6 +60,8 @@ template <> struct seeded_hash > { typedef seeded_hash_function hash_function; }; template <> struct seeded_hash > { typedef seeded_hash_function hash_function; }; +template <> struct seeded_hash > +{ typedef seeded_hash_function hash_function; }; template <> struct seeded_hash > { typedef seeded_hash_function hash_function; }; diff --git a/cxxmph/stringpiece.h b/cxxmph/stringpiece.h index fdd8f75..ee6d125 100644 --- a/cxxmph/stringpiece.h +++ b/cxxmph/stringpiece.h @@ -145,32 +145,34 @@ class StringPiece { StringPiece substr(size_type pos, size_type n = npos) const; }; -} // namespace cxxmph +inline bool operator==(const StringPiece& x, const StringPiece& y) { + return x.length() == y.length() && memcmp(x.data(), y.data(), x.length()) == 0; +} -bool operator==(const cxxmph::StringPiece& x, const cxxmph::StringPiece& y); - -inline bool operator!=(const cxxmph::StringPiece& x, const cxxmph::StringPiece& y) { +inline bool operator!=(const StringPiece& x, const StringPiece& y) { return !(x == y); } -inline bool operator<(const cxxmph::StringPiece& x, const cxxmph::StringPiece& y) { +inline bool operator<(const StringPiece& x, const StringPiece& y) { const int r = memcmp(x.data(), y.data(), std::min(x.size(), y.size())); return ((r < 0) || ((r == 0) && (x.size() < y.size()))); } -inline bool operator>(const cxxmph::StringPiece& x, const cxxmph::StringPiece& y) { +inline bool operator>(const StringPiece& x, const StringPiece& y) { return y < x; } -inline bool operator<=(const cxxmph::StringPiece& x, const cxxmph::StringPiece& y) { +inline bool operator<=(const StringPiece& x, const StringPiece& y) { return !(x > y); } -inline bool operator>=(const cxxmph::StringPiece& x, const cxxmph::StringPiece& y) { +inline bool operator>=(const StringPiece& x, StringPiece& y) { return !(x < y); } +} // namespace cxxmph + // allow StringPiece to be logged extern std::ostream& operator<<(std::ostream& o, const cxxmph::StringPiece& piece); diff --git a/src/bm_numbers.c b/src/bm_numbers.c index 7c6abb5..cd3aa74 100644 --- a/src/bm_numbers.c +++ b/src/bm_numbers.c @@ -1,10 +1,6 @@ #include #include -#include -using __gnu_cxx::hash_set; -static const char cxx_name = "__gnu_cxx::hash_set"; - #include "bitbool.h" #include "cmph.h" #include "cmph_benchmark.h" @@ -71,12 +67,12 @@ void bm_create(CMPH_ALGO algo, int iters) { void bm_search(CMPH_ALGO algo, int iters) { int i = 0; - char mphf_name[128]; + char *mphf_name; cmph_t* mphf = NULL; - - snprintf(mphf_name, 128, "%s:%u", cxx_name, iters); + mphf_name = create_lsmap_key(algo, iters); mphf = (cmph_t*)lsmap_search(g_created_mphf, mphf_name); + free(mphf_name); cmph_uint32* count = (cmph_uint32*)malloc(sizeof(cmph_uint32)*iters); cmph_uint32* hash_count = (cmph_uint32*)malloc(sizeof(cmph_uint32)*iters); @@ -106,49 +102,6 @@ DECLARE_ALGO(CMPH_BRZ); DECLARE_ALGO(CMPH_FCH); DECLARE_ALGO(CMPH_BDZ); -void bm_create_ext_hash_set(int iters) { - cmph_uint32 i = 0; - - if (iters > g_numbers_len) { - fprintf(stderr, "No input with proper size."); - exit(-1); - } - - hash_set* ext_hash_set = new hash_set; - for (i = 0; i < iters; ++i) { - ext_hash_set->insert(g_numbers[i]); - } - lsmap_append(g_created_mphf, cxx_name, ext_hash_set); -} - -void bm_search_ext_hash_set(int iters) { - cmph_uint32 i = 0; - - if (iters > g_numbers_len) { - fprintf(stderr, "No input with proper size."); - exit(-1); - } - - snprintf(mphf_name, 128, "%s:%u", hash_count, iters); - mphf = (__gnu_cxx::hash_set*)lsmap_search(g_created_mphf, mphf_name); - - cmph_uint32* count = (cmph_uint32*)malloc(sizeof(cmph_uint32)*iters); - cmph_uint32* hash_count = (cmph_uint32*)malloc(sizeof(cmph_uint32)*iters); - - for (i = 0; i < iters * 100; ++i) { - cmph_uint32 pos = random() % iters; - const char* buf = (const char*)(g_numbers + pos); - cmph_uint32 h = cmph_search(mphf, buf, sizeof(cmph_uint32)); - ++count[pos]; - ++hash_count[h]; - } - - // Verify correctness later. - lsmap_append(g_expected_probes, create_lsmap_key(algo, iters), count); - lsmap_append(g_mphf_probes, create_lsmap_key(algo, iters), hash_count); -} -} - int main(int argc, char** argv) { g_numbers_len = 1000 * 1000; g_numbers = random_numbers_vector_new(g_numbers_len); @@ -162,8 +115,8 @@ int main(int argc, char** argv) { BM_REGISTER(bm_search_CMPH_CHM, 1000 * 1000); // BM_REGISTER(bm_create_CMPH_BRZ, 1000 * 1000); // BM_REGISTER(bm_search_CMPH_BRZ, 1000 * 1000); - BM_REGISTER(bm_create_CMPH_FCH, 1000 * 1000); - BM_REGISTER(bm_search_CMPH_FCH, 1000 * 1000); +// BM_REGISTER(bm_create_CMPH_FCH, 1000 * 1000); +// BM_REGISTER(bm_search_CMPH_FCH, 1000 * 1000); BM_REGISTER(bm_create_CMPH_BDZ, 1000 * 1000); BM_REGISTER(bm_search_CMPH_BDZ, 1000 * 1000); run_benchmarks(argc, argv); diff --git a/src/cmph_benchmark.c b/src/cmph_benchmark.c index a67f78b..0023e2f 100644 --- a/src/cmph_benchmark.c +++ b/src/cmph_benchmark.c @@ -111,8 +111,10 @@ void bm_end(const char* name) { timeval_subtract(&stime, &benchmark->end.ru_stime, &benchmark->begin.ru_stime); printf("Benchmark: %s\n", benchmark->name); - printf("User time used : %ld.%06ld\n", utime.tv_sec, utime.tv_usec); - printf("System time used: %ld.%06ld\n", stime.tv_sec, stime.tv_usec); + printf("User time used : %ld.%06ld\n", + utime.tv_sec, (long int)utime.tv_usec); + printf("System time used: %ld.%06ld\n", + stime.tv_sec, (long int)stime.tv_usec); printf("\n"); }