Add 'deps/cmph/' from commit 'a250982ade093f4eed0552bbdd22dd7b0432007f'

git-subtree-dir: deps/cmph
git-subtree-mainline: 5040f4007b
git-subtree-split: a250982ade
This commit is contained in:
2023-08-21 13:50:16 +03:00
713 changed files with 29764 additions and 0 deletions

58
deps/cmph/cxxmph/.ycm_extra_conf.py vendored Normal file
View File

@@ -0,0 +1,58 @@
import os
import ycm_core
flags = [
'-Wall',
'-Wextra',
'-Werror',
'-DNDEBUG',
'-DUSE_CLANG_COMPLETER',
'-std=c++11',
'-x',
'c++',
'-isystem'
'/usr/lib/c++/v1',
'-I',
'.',
]
def DirectoryOfThisScript():
return os.path.dirname( os.path.abspath( __file__ ) )
def MakeRelativePathsInFlagsAbsolute( flags, working_directory ):
if not working_directory:
return list( flags )
new_flags = []
make_next_absolute = False
path_flags = [ '-isystem', '-I', '-iquote', '--sysroot=' ]
for flag in flags:
new_flag = flag
if make_next_absolute:
make_next_absolute = False
if not flag.startswith( '/' ):
new_flag = os.path.join( working_directory, flag )
for path_flag in path_flags:
if flag == path_flag:
make_next_absolute = True
break
if flag.startswith( path_flag ):
path = flag[ len( path_flag ): ]
new_flag = path_flag + os.path.join( working_directory, path )
break
if new_flag:
new_flags.append( new_flag )
return new_flags
def FlagsForFile( filename ):
relative_to = DirectoryOfThisScript()
final_flags = MakeRelativePathsInFlagsAbsolute( flags, relative_to )
return {
'flags': final_flags,
'do_cache': True
}

62
deps/cmph/cxxmph/Makefile.am vendored Normal file
View File

@@ -0,0 +1,62 @@
TESTS = $(check_PROGRAMS)
check_PROGRAMS = seeded_hash_test mph_bits_test hollow_iterator_test mph_index_test trigraph_test
if USE_LIBCHECK
check_PROGRAMS += test_test map_tester_test mph_map_test dense_hash_map_test string_util_test
check_LTLIBRARIES = libcxxmph_test.la
endif
if USE_BENCHMARKS
noinst_PROGRAMS = bm_map # bm_index - disabled because of cmph dependency
endif
bin_PROGRAMS = cxxmph
cxxmph_includedir = $(includedir)/cxxmph/
cxxmph_include_HEADERS = mph_bits.h mph_map.h mph_index.h MurmurHash3.h trigraph.h seeded_hash.h stringpiece.h hollow_iterator.h string_util.h
noinst_LTLIBRARIES = libcxxmph_bm.la
lib_LTLIBRARIES = libcxxmph.la
libcxxmph_la_SOURCES = MurmurHash3.cpp trigraph.cc mph_bits.cc mph_index.cc benchmark.h benchmark.cc string_util.cc
libcxxmph_la_LDFLAGS = -version-info 0:0:0
libcxxmph_test_la_SOURCES = test.h test.cc
libcxxmph_test_la_LIBADD = libcxxmph.la
libcxxmph_bm_la_SOURCES = benchmark.h benchmark.cc bm_common.h bm_common.cc
libcxxmph_bm_la_LIBADD = libcxxmph.la
test_test_SOURCES = test_test.cc
test_test_LDADD = libcxxmph_test.la $(CHECK_LIBS)
mph_map_test_LDADD = libcxxmph_test.la $(CHECK_LIBS)
mph_map_test_SOURCES = mph_map_test.cc
dense_hash_map_test_LDADD = libcxxmph_test.la $(CHECK_LIBS)
dense_hash_map_test_SOURCES = dense_hash_map_test.cc
mph_index_test_LDADD = libcxxmph.la
mph_index_test_SOURCES = mph_index_test.cc
trigraph_test_LDADD = libcxxmph.la
trigraph_test_SOURCES = trigraph_test.cc
# Bad dependency, do not compile by default.
# bm_index_LDADD = libcxxmph_bm.la -lcmph
# bm_index_SOURCES = bm_index.cc
bm_map_LDADD = libcxxmph_bm.la
bm_map_SOURCES = bm_map.cc
cxxmph_LDADD = libcxxmph.la
cxxmph_SOURCES = cxxmph.cc
hollow_iterator_test_SOURCES = hollow_iterator_test.cc
seeded_hash_test_SOURCES = seeded_hash_test.cc
seeded_hash_test_LDADD = libcxxmph.la
mph_bits_test_SOURCES = mph_bits_test.cc
mph_bits_test_LDADD = libcxxmph.la
string_util_test_SOURCES = string_util_test.cc
string_util_test_LDADD = libcxxmph.la libcxxmph_test.la $(CHECK_LIBS)
map_tester_test_SOURCES = map_tester.h map_tester.cc map_tester_test.cc
map_tester_test_LDADD = libcxxmph.la libcxxmph_test.la $(CHECK_LIBS)

335
deps/cmph/cxxmph/MurmurHash3.cpp vendored Normal file
View File

@@ -0,0 +1,335 @@
//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
// Note - The x86 and x64 versions do _not_ produce the same results, as the
// algorithms are optimized for their respective platforms. You can still
// compile and run any of them on any platform, but your performance with the
// non-native version will be less than optimal.
#include "MurmurHash3.h"
//-----------------------------------------------------------------------------
// Platform-specific functions and macros
// Microsoft Visual Studio
#if defined(_MSC_VER)
#define FORCE_INLINE __forceinline
#include <stdlib.h>
#define ROTL32(x,y) _rotl(x,y)
#define ROTL64(x,y) _rotl64(x,y)
#define BIG_CONSTANT(x) (x)
// Other compilers
#else // defined(_MSC_VER)
#define FORCE_INLINE __attribute__((always_inline))
inline uint32_t rotl32 ( uint32_t x, int8_t r )
{
return (x << r) | (x >> (32 - r));
}
inline uint64_t rotl64 ( uint64_t x, int8_t r )
{
return (x << r) | (x >> (64 - r));
}
#define ROTL32(x,y) rotl32(x,y)
#define ROTL64(x,y) rotl64(x,y)
#define BIG_CONSTANT(x) (x##LLU)
#endif // !defined(_MSC_VER)
//-----------------------------------------------------------------------------
// Block read - if your platform needs to do endian-swapping or can only
// handle aligned reads, do the conversion here
/*FORCE_INLINE*/ uint32_t getblock ( const uint32_t * p, int i )
{
return p[i];
}
/*FORCE_INLINE*/ uint64_t getblock ( const uint64_t * p, int i )
{
return p[i];
}
//-----------------------------------------------------------------------------
// Finalization mix - force all bits of a hash block to avalanche
/*FORCE_INLINE*/ uint32_t fmix ( uint32_t h )
{
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
return h;
}
//----------
/*FORCE_INLINE*/ uint64_t fmix ( uint64_t k )
{
k ^= k >> 33;
k *= BIG_CONSTANT(0xff51afd7ed558ccd);
k ^= k >> 33;
k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
k ^= k >> 33;
return k;
}
//-----------------------------------------------------------------------------
void MurmurHash3_x86_32 ( const void * key, int len,
uint32_t seed, void * out )
{
const uint8_t * data = (const uint8_t*)key;
const int nblocks = len / 4;
uint32_t h1 = seed;
uint32_t c1 = 0xcc9e2d51;
uint32_t c2 = 0x1b873593;
//----------
// body
const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
for(int i = -nblocks; i; i++)
{
uint32_t k1 = getblock(blocks,i);
k1 *= c1;
k1 = ROTL32(k1,15);
k1 *= c2;
h1 ^= k1;
h1 = ROTL32(h1,13);
h1 = h1*5+0xe6546b64;
}
//----------
// tail
const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
uint32_t k1 = 0;
switch(len & 3)
{
case 3: k1 ^= tail[2] << 16;
case 2: k1 ^= tail[1] << 8;
case 1: k1 ^= tail[0];
k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
};
//----------
// finalization
h1 ^= len;
h1 = fmix(h1);
*(uint32_t*)out = h1;
}
//-----------------------------------------------------------------------------
void MurmurHash3_x86_128 ( const void * key, const int len,
uint32_t seed, void * out )
{
const uint8_t * data = (const uint8_t*)key;
const int nblocks = len / 16;
uint32_t h1 = seed;
uint32_t h2 = seed;
uint32_t h3 = seed;
uint32_t h4 = seed;
uint32_t c1 = 0x239b961b;
uint32_t c2 = 0xab0e9789;
uint32_t c3 = 0x38b34ae5;
uint32_t c4 = 0xa1e38b93;
//----------
// body
const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
for(int i = -nblocks; i; i++)
{
uint32_t k1 = getblock(blocks,i*4+0);
uint32_t k2 = getblock(blocks,i*4+1);
uint32_t k3 = getblock(blocks,i*4+2);
uint32_t k4 = getblock(blocks,i*4+3);
k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
}
//----------
// tail
const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
uint32_t k1 = 0;
uint32_t k2 = 0;
uint32_t k3 = 0;
uint32_t k4 = 0;
switch(len & 15)
{
case 15: k4 ^= tail[14] << 16;
case 14: k4 ^= tail[13] << 8;
case 13: k4 ^= tail[12] << 0;
k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
case 12: k3 ^= tail[11] << 24;
case 11: k3 ^= tail[10] << 16;
case 10: k3 ^= tail[ 9] << 8;
case 9: k3 ^= tail[ 8] << 0;
k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
case 8: k2 ^= tail[ 7] << 24;
case 7: k2 ^= tail[ 6] << 16;
case 6: k2 ^= tail[ 5] << 8;
case 5: k2 ^= tail[ 4] << 0;
k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
case 4: k1 ^= tail[ 3] << 24;
case 3: k1 ^= tail[ 2] << 16;
case 2: k1 ^= tail[ 1] << 8;
case 1: k1 ^= tail[ 0] << 0;
k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
};
//----------
// finalization
h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
h1 += h2; h1 += h3; h1 += h4;
h2 += h1; h3 += h1; h4 += h1;
h1 = fmix(h1);
h2 = fmix(h2);
h3 = fmix(h3);
h4 = fmix(h4);
h1 += h2; h1 += h3; h1 += h4;
h2 += h1; h3 += h1; h4 += h1;
((uint32_t*)out)[0] = h1;
((uint32_t*)out)[1] = h2;
((uint32_t*)out)[2] = h3;
((uint32_t*)out)[3] = h4;
}
//-----------------------------------------------------------------------------
void MurmurHash3_x64_128 ( const void * key, const int len,
const uint32_t seed, void * out )
{
const uint8_t * data = (const uint8_t*)key;
const int nblocks = len / 16;
uint64_t h1 = seed;
uint64_t h2 = seed;
uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
//----------
// body
const uint64_t * blocks = (const uint64_t *)(data);
for(int i = 0; i < nblocks; i++)
{
uint64_t k1 = getblock(blocks,i*2+0);
uint64_t k2 = getblock(blocks,i*2+1);
k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
}
//----------
// tail
const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
uint64_t k1 = 0;
uint64_t k2 = 0;
switch(len & 15)
{
case 15: k2 ^= uint64_t(tail[14]) << 48;
case 14: k2 ^= uint64_t(tail[13]) << 40;
case 13: k2 ^= uint64_t(tail[12]) << 32;
case 12: k2 ^= uint64_t(tail[11]) << 24;
case 11: k2 ^= uint64_t(tail[10]) << 16;
case 10: k2 ^= uint64_t(tail[ 9]) << 8;
case 9: k2 ^= uint64_t(tail[ 8]) << 0;
k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
case 8: k1 ^= uint64_t(tail[ 7]) << 56;
case 7: k1 ^= uint64_t(tail[ 6]) << 48;
case 6: k1 ^= uint64_t(tail[ 5]) << 40;
case 5: k1 ^= uint64_t(tail[ 4]) << 32;
case 4: k1 ^= uint64_t(tail[ 3]) << 24;
case 3: k1 ^= uint64_t(tail[ 2]) << 16;
case 2: k1 ^= uint64_t(tail[ 1]) << 8;
case 1: k1 ^= uint64_t(tail[ 0]) << 0;
k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
};
//----------
// finalization
h1 ^= len; h2 ^= len;
h1 += h2;
h2 += h1;
h1 = fmix(h1);
h2 = fmix(h2);
h1 += h2;
h2 += h1;
((uint64_t*)out)[0] = h1;
((uint64_t*)out)[1] = h2;
}
//-----------------------------------------------------------------------------

37
deps/cmph/cxxmph/MurmurHash3.h vendored Normal file
View File

@@ -0,0 +1,37 @@
//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
#ifndef _MURMURHASH3_H_
#define _MURMURHASH3_H_
//-----------------------------------------------------------------------------
// Platform-specific functions and macros
// Microsoft Visual Studio
#if defined(_MSC_VER)
typedef unsigned char uint8_t;
typedef unsigned long uint32_t;
typedef unsigned __int64 uint64_t;
// Other compilers
#else // defined(_MSC_VER)
#include <stdint.h>
#endif // !defined(_MSC_VER)
//-----------------------------------------------------------------------------
void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out );
void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
//-----------------------------------------------------------------------------
#endif // _MURMURHASH3_H_

142
deps/cmph/cxxmph/benchmark.cc vendored Normal file
View File

@@ -0,0 +1,142 @@
#include "benchmark.h"
#include <cerrno>
#include <cstring>
#include <cstdio>
#include <memory>
#include <sys/time.h>
#include <sys/resource.h>
#include <iomanip>
#include <iostream>
#include <sstream>
#include <vector>
using std::cerr;
using std::cout;
using std::endl;
using std::setfill;
using std::setw;
using std::string;
using std::ostringstream;
using std::vector;
namespace {
/* Subtract the `struct timeval' values X and Y,
storing the result in RESULT.
Return 1 if the difference is negative, otherwise 0. */
int timeval_subtract (
struct timeval *result, struct timeval *x, struct timeval* y) {
/* Perform the carry for the later subtraction by updating y. */
if (x->tv_usec < y->tv_usec) {
int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
y->tv_usec -= 1000000 * nsec;
y->tv_sec += nsec;
}
if (x->tv_usec - y->tv_usec > 1000000) {
int nsec = (x->tv_usec - y->tv_usec) / 1000000;
y->tv_usec += 1000000 * nsec;
y->tv_sec -= nsec;
}
/* Compute the time remaining to wait.
tv_usec is certainly positive. */
result->tv_sec = x->tv_sec - y->tv_sec;
result->tv_usec = x->tv_usec - y->tv_usec;
/* Return 1 if result is negative. */
return x->tv_sec < y->tv_sec;
}
// C++ iostream is terrible for formatting.
string timeval_to_string(timeval tv) {
ostringstream out;
out << setfill(' ') << setw(3) << tv.tv_sec << '.';
out << setfill('0') << setw(6) << tv.tv_usec;
return out.str();
}
struct rusage getrusage_or_die() {
struct rusage rs;
int ret = getrusage(RUSAGE_SELF, &rs);
if (ret != 0) {
cerr << "rusage failed: " << strerror(errno) << endl;
exit(-1);
}
return rs;
}
struct timeval gettimeofday_or_die() {
struct timeval tv;
int ret = gettimeofday(&tv, NULL);
if (ret != 0) {
cerr << "gettimeofday failed: " << strerror(errno) << endl;
exit(-1);
}
return tv;
}
#ifdef HAVE_CXA_DEMANGLE
string demangle(const string& name) {
char buf[1024];
unsigned int size = 1024;
int status;
char* res = abi::__cxa_demangle(
name.c_str(), buf, &size, &status);
return res;
}
#else
string demangle(const string& name) { return name; }
#endif
static vector<cxxmph::Benchmark*> g_benchmarks;
} // anonymous namespace
namespace cxxmph {
/* static */ void Benchmark::Register(Benchmark* bm) {
if (bm->name().empty()) {
string name = demangle(typeid(*bm).name());
bm->set_name(name);
}
g_benchmarks.push_back(bm);
}
/* static */ void Benchmark::RunAll() {
for (uint32_t i = 0; i < g_benchmarks.size(); ++i) {
std::auto_ptr<Benchmark> bm(g_benchmarks[i]);
if (!bm->SetUp()) {
cerr << "Set up phase for benchmark "
<< bm->name() << " failed." << endl;
continue;
}
bm->MeasureRun();
bm->TearDown();
}
}
void Benchmark::MeasureRun() {
struct timeval walltime_begin = gettimeofday_or_die();
struct rusage begin = getrusage_or_die();
Run();
struct rusage end = getrusage_or_die();
struct timeval walltime_end = gettimeofday_or_die();
struct timeval utime;
timeval_subtract(&utime, &end.ru_utime, &begin.ru_utime);
struct timeval stime;
timeval_subtract(&stime, &end.ru_stime, &begin.ru_stime);
struct timeval wtime;
timeval_subtract(&wtime, &walltime_end, &walltime_begin);
cout << "Benchmark: " << name_ << endl;
cout << "CPU User time : " << timeval_to_string(utime) << endl;
cout << "CPU System time: " << timeval_to_string(stime) << endl;
cout << "Wall clock time: " << timeval_to_string(wtime) << endl;
cout << endl;
}
} // namespace cxxmph

32
deps/cmph/cxxmph/benchmark.h vendored Normal file
View File

@@ -0,0 +1,32 @@
#ifndef __CXXMPH_BENCHMARK_H__
#define __CXXMPH_BENCHMARK_H__
#include <string>
#include <typeinfo>
namespace cxxmph {
class Benchmark {
public:
Benchmark() {}
virtual ~Benchmark() {}
const std::string& name() { return name_; }
void set_name(const std::string& name) { name_ = name; }
static void Register(Benchmark* bm);
static void RunAll();
protected:
virtual bool SetUp() { return true; };
virtual void Run() = 0;
virtual bool TearDown() { return true; };
private:
std::string name_;
void MeasureRun();
};
} // namespace cxxmph
#endif

75
deps/cmph/cxxmph/bm_common.cc vendored Normal file
View File

@@ -0,0 +1,75 @@
#include <cmath>
#include <fstream>
#include <limits>
#include <iostream>
#include <set>
#include "bm_common.h"
using std::cerr;
using std::endl;
using std::set;
using std::string;
using std::vector;
namespace cxxmph {
UrlsBenchmark::~UrlsBenchmark() {}
bool UrlsBenchmark::SetUp() {
vector<string> urls;
std::ifstream f(urls_file_.c_str());
if (!f.is_open()) {
cerr << "Failed to open urls file " << urls_file_ << endl;
return false;
}
string buffer;
while(std::getline(f, buffer)) urls.push_back(buffer);
set<string> unique(urls.begin(), urls.end());
if (unique.size() != urls.size()) {
cerr << "Input file has repeated keys." << endl;
return false;
}
urls.swap(urls_);
return true;
}
SearchUrlsBenchmark::~SearchUrlsBenchmark() {}
bool SearchUrlsBenchmark::SetUp() {
if (!UrlsBenchmark::SetUp()) return false;
int32_t miss_ratio_int32 = std::numeric_limits<int32_t>::max() * miss_ratio_;
forced_miss_urls_.resize(nsearches_);
random_.resize(nsearches_);
for (uint32_t i = 0; i < nsearches_; ++i) {
random_[i] = urls_[random() % urls_.size()];
if (random() < miss_ratio_int32) {
forced_miss_urls_[i] = random_[i].as_string() + ".force_miss";
random_[i] = forced_miss_urls_[i];
}
}
return true;
}
Uint64Benchmark::~Uint64Benchmark() {}
bool Uint64Benchmark::SetUp() {
set<uint64_t> unique;
for (uint32_t i = 0; i < count_; ++i) {
uint64_t v;
do { v = random(); } while (unique.find(v) != unique.end());
values_.push_back(v);
unique.insert(v);
}
return true;
}
SearchUint64Benchmark::~SearchUint64Benchmark() {}
bool SearchUint64Benchmark::SetUp() {
if (!Uint64Benchmark::SetUp()) return false;
random_.resize(nsearches_);
for (uint32_t i = 0; i < nsearches_; ++i) {
uint32_t pos = random() % values_.size();
random_[i] = values_[pos];
}
return true;
}
} // namespace cxxmph

73
deps/cmph/cxxmph/bm_common.h vendored Normal file
View File

@@ -0,0 +1,73 @@
#ifndef __CXXMPH_BM_COMMON_H__
#define __CXXMPH_BM_COMMON_H__
#include "stringpiece.h"
#include <string>
#include <vector>
#include <unordered_map> // std::hash
#include "MurmurHash3.h"
#include "benchmark.h"
namespace std {
template <> struct hash<cxxmph::StringPiece> {
uint32_t operator()(const cxxmph::StringPiece& k) const {
uint32_t out;
MurmurHash3_x86_32(k.data(), k.length(), 1, &out);
return out;
}
};
} // namespace std
namespace cxxmph {
class UrlsBenchmark : public Benchmark {
public:
UrlsBenchmark(const std::string& urls_file) : urls_file_(urls_file) { }
virtual ~UrlsBenchmark();
protected:
virtual bool SetUp();
const std::string urls_file_;
std::vector<std::string> urls_;
};
class SearchUrlsBenchmark : public UrlsBenchmark {
public:
SearchUrlsBenchmark(const std::string& urls_file, uint32_t nsearches, float miss_ratio)
: UrlsBenchmark(urls_file), nsearches_(nsearches), miss_ratio_(miss_ratio) {}
virtual ~SearchUrlsBenchmark();
protected:
virtual bool SetUp();
const uint32_t nsearches_;
float miss_ratio_;
std::vector<std::string> forced_miss_urls_;
std::vector<StringPiece> random_;
};
class Uint64Benchmark : public Benchmark {
public:
Uint64Benchmark(uint32_t count) : count_(count) { }
virtual ~Uint64Benchmark();
virtual void Run() {}
protected:
virtual bool SetUp();
const uint32_t count_;
std::vector<uint64_t> values_;
};
class SearchUint64Benchmark : public Uint64Benchmark {
public:
SearchUint64Benchmark(uint32_t count, uint32_t nsearches)
: Uint64Benchmark(count), nsearches_(nsearches) { }
virtual ~SearchUint64Benchmark();
virtual void Run() {};
protected:
virtual bool SetUp();
const uint32_t nsearches_;
std::vector<uint64_t> random_;
};
} // namespace cxxmph
#endif // __CXXMPH_BM_COMMON_H__

149
deps/cmph/cxxmph/bm_index.cc vendored Normal file
View File

@@ -0,0 +1,149 @@
#include <cmph.h>
#include <cstdio>
#include <set>
#include <string>
#include <unordered_map>
#include "bm_common.h"
#include "stringpiece.h"
#include "mph_index.h"
using namespace cxxmph;
using std::string;
using std::unordered_map;
class BM_MPHIndexCreate : public UrlsBenchmark {
public:
BM_MPHIndexCreate(const std::string& urls_file)
: UrlsBenchmark(urls_file) { }
protected:
virtual void Run() {
SimpleMPHIndex<StringPiece> index;
index.Reset(urls_.begin(), urls_.end(), urls_.size());
}
};
class BM_STLIndexCreate : public UrlsBenchmark {
public:
BM_STLIndexCreate(const std::string& urls_file)
: UrlsBenchmark(urls_file) { }
protected:
virtual void Run() {
unordered_map<StringPiece, uint32_t> index;
int idx = 0;
for (auto it = urls_.begin(); it != urls_.end(); ++it) {
index.insert(make_pair(*it, idx++));
}
}
};
class BM_MPHIndexSearch : public SearchUrlsBenchmark {
public:
BM_MPHIndexSearch(const std::string& urls_file, int nsearches)
: SearchUrlsBenchmark(urls_file, nsearches, 0) { }
virtual void Run() {
uint64_t sum = 0;
for (auto it = random_.begin(); it != random_.end(); ++it) {
auto idx = index_.index(*it);
// Collision check to be fair with STL
if (strcmp(urls_[idx].c_str(), it->data()) != 0) idx = -1;
sum += idx;
}
}
protected:
virtual bool SetUp () {
if (!SearchUrlsBenchmark::SetUp()) return false;
index_.Reset(urls_.begin(), urls_.end(), urls_.size());
return true;
}
SimpleMPHIndex<StringPiece> index_;
};
class BM_CmphIndexSearch : public SearchUrlsBenchmark {
public:
BM_CmphIndexSearch(const std::string& urls_file, int nsearches)
: SearchUrlsBenchmark(urls_file, nsearches, 0) { }
~BM_CmphIndexSearch() { if (index_) cmph_destroy(index_); }
virtual void Run() {
uint64_t sum = 0;
for (auto it = random_.begin(); it != random_.end(); ++it) {
auto idx = cmph_search(index_, it->data(), it->length());
// Collision check to be fair with STL
if (strcmp(urls_[idx].c_str(), it->data()) != 0) idx = -1;
sum += idx;
}
}
protected:
virtual bool SetUp() {
if (!SearchUrlsBenchmark::SetUp()) {
cerr << "Parent class setup failed." << endl;
return false;
}
FILE* f = fopen(urls_file_.c_str(), "r");
if (!f) {
cerr << "Faied to open " << urls_file_ << endl;
return false;
}
cmph_io_adapter_t* source = cmph_io_nlfile_adapter(f);
if (!source) {
cerr << "Faied to create io adapter for " << urls_file_ << endl;
return false;
}
cmph_config_t* config = cmph_config_new(source);
if (!config) {
cerr << "Failed to create config" << endl;
return false;
}
cmph_config_set_algo(config, CMPH_BDZ);
cmph_t* mphf = cmph_new(config);
if (!mphf) {
cerr << "Failed to create mphf." << endl;
return false;
}
cmph_config_destroy(config);
cmph_io_nlfile_adapter_destroy(source);
fclose(f);
index_ = mphf;
return true;
}
cmph_t* index_;
};
class BM_STLIndexSearch : public SearchUrlsBenchmark {
public:
BM_STLIndexSearch(const std::string& urls_file, int nsearches)
: SearchUrlsBenchmark(urls_file, nsearches, 0) { }
virtual void Run() {
uint64_t sum = 0;
for (auto it = random_.begin(); it != random_.end(); ++it) {
auto idx = index_.find(*it);
sum += idx->second;
}
}
protected:
virtual bool SetUp () {
if (!SearchUrlsBenchmark::SetUp()) return false;
unordered_map<StringPiece, uint32_t> index;
int idx = 0;
for (auto it = urls_.begin(); it != urls_.end(); ++it) {
index.insert(make_pair(*it, idx++));
}
index.swap(index_);
return true;
}
unordered_map<StringPiece, uint32_t> index_;
};
int main(int argc, char** argv) {
Benchmark::Register(new BM_MPHIndexCreate("URLS100k"));
Benchmark::Register(new BM_STLIndexCreate("URLS100k"));
Benchmark::Register(new BM_MPHIndexSearch("URLS100k", 10*1000*1000));
Benchmark::Register(new BM_STLIndexSearch("URLS100k", 10*1000*1000));
Benchmark::Register(new BM_CmphIndexSearch("URLS100k", 10*1000*1000));
Benchmark::RunAll();
return 0;
}

126
deps/cmph/cxxmph/bm_map.cc vendored Normal file
View File

@@ -0,0 +1,126 @@
#include <string>
#include <unordered_map>
#include "hopscotch_map.h"
#include "bm_common.h"
#include "mph_map.h"
using std::string;
// Another reference benchmark:
// http://blog.aggregateknowledge.com/tag/bigmemory/
namespace cxxmph {
template <class MapType, class T>
const T* myfind(const MapType& mymap, const T& k) {
auto it = mymap.find(k);
auto end = mymap.end();
if (it == end) return NULL;
return &it->second;
}
template <class MapType>
class BM_CreateUrls : public UrlsBenchmark {
public:
BM_CreateUrls(const string& urls_file) : UrlsBenchmark(urls_file) { }
virtual void Run() {
MapType mymap;
for (auto it = urls_.begin(); it != urls_.end(); ++it) {
mymap[*it] = *it;
}
}
};
template <class MapType>
class BM_SearchUrls : public SearchUrlsBenchmark {
public:
BM_SearchUrls(const std::string& urls_file, int nsearches, float miss_ratio)
: SearchUrlsBenchmark(urls_file, nsearches, miss_ratio) { }
virtual ~BM_SearchUrls() {}
virtual void Run() {
uint32_t total = 1;
for (auto it = random_.begin(); it != random_.end(); ++it) {
auto v = myfind(mymap_, *it);
if (v) total += v->length();
}
fprintf(stderr, "Total: %u\n", total);
}
protected:
virtual bool SetUp() {
if (!SearchUrlsBenchmark::SetUp()) return false;
for (auto it = urls_.begin(); it != urls_.end(); ++it) {
mymap_[*it] = *it;
}
mymap_.rehash(mymap_.bucket_count());
fprintf(stderr, "Occupation: %f\n", static_cast<float>(mymap_.size())/mymap_.bucket_count());
return true;
}
MapType mymap_;
};
template <class MapType>
class BM_SearchUint64 : public SearchUint64Benchmark {
public:
BM_SearchUint64() : SearchUint64Benchmark(100000, 10*1000*1000) { }
virtual bool SetUp() {
if (!SearchUint64Benchmark::SetUp()) return false;
for (uint32_t i = 0; i < values_.size(); ++i) {
mymap_[values_[i]] = values_[i];
}
mymap_.rehash(mymap_.bucket_count());
// Double check if everything is all right
cerr << "Doing double check" << endl;
for (uint32_t i = 0; i < values_.size(); ++i) {
if (mymap_[values_[i]] != values_[i]) {
cerr << "Looking for " << i << " th key value " << values_[i];
cerr << " yielded " << mymap_[values_[i]] << endl;
return false;
}
}
return true;
}
virtual void Run() {
for (auto it = random_.begin(); it != random_.end(); ++it) {
auto v = myfind(mymap_, *it);
if (*v != *it) {
cerr << "Looked for " << *it << " got " << *v << endl;
exit(-1);
}
}
}
MapType mymap_;
};
} // namespace cxxmph
using namespace cxxmph;
int main(int argc, char** argv) {
srandom(4);
Benchmark::Register(new BM_CreateUrls<dense_hash_map<StringPiece, StringPiece>>("URLS100k"));
Benchmark::Register(new BM_CreateUrls<std::unordered_map<StringPiece, StringPiece>>("URLS100k"));
Benchmark::Register(new BM_CreateUrls<mph_map<StringPiece, StringPiece>>("URLS100k"));
Benchmark::Register(new BM_CreateUrls<sparse_hash_map<StringPiece, StringPiece>>("URLS100k"));
Benchmark::Register(new BM_CreateUrls<tsl::hopscotch_map<StringPiece, StringPiece>>("URLS100k"));
Benchmark::Register(new BM_SearchUrls<dense_hash_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0));
Benchmark::Register(new BM_SearchUrls<std::unordered_map<StringPiece, StringPiece, Murmur3StringPiece>>("URLS100k", 10*1000 * 1000, 0));
Benchmark::Register(new BM_SearchUrls<mph_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0));
Benchmark::Register(new BM_SearchUrls<sparse_hash_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0));
Benchmark::Register(new BM_SearchUrls<tsl::hopscotch_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0));
Benchmark::Register(new BM_SearchUrls<dense_hash_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0.9));
Benchmark::Register(new BM_SearchUrls<std::unordered_map<StringPiece, StringPiece, Murmur3StringPiece>>("URLS100k", 10*1000 * 1000, 0.9));
Benchmark::Register(new BM_SearchUrls<mph_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0.9));
Benchmark::Register(new BM_SearchUrls<sparse_hash_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0.9));
Benchmark::Register(new BM_SearchUrls<tsl::hopscotch_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0.9));
Benchmark::Register(new BM_SearchUint64<dense_hash_map<uint64_t, uint64_t>>);
Benchmark::Register(new BM_SearchUint64<std::unordered_map<uint64_t, uint64_t>>);
Benchmark::Register(new BM_SearchUint64<mph_map<uint64_t, uint64_t>>);
Benchmark::Register(new BM_SearchUint64<sparse_hash_map<uint64_t, uint64_t>>);
Benchmark::Register(new BM_SearchUint64<tsl::hopscotch_map<uint64_t, uint64_t>>);
Benchmark::RunAll();
}

74
deps/cmph/cxxmph/cxxmph.cc vendored Normal file
View File

@@ -0,0 +1,74 @@
// Copyright 2010 Google Inc. All Rights Reserved.
// Author: davi@google.com (Davi Reis)
#include <getopt.h>
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include "mph_map.h"
#include "config.h"
using std::cerr;
using std::cout;
using std::endl;
using std::getline;
using std::ifstream;
using std::string;
using std::vector;
using cxxmph::mph_map;
void usage(const char* prg) {
cerr << "usage: " << prg << " [-v] [-h] [-V] <keys.txt>" << endl;
}
void usage_long(const char* prg) {
usage(prg);
cerr << " -h\t print this help message" << endl;
cerr << " -V\t print version number and exit" << endl;
cerr << " -v\t increase verbosity (may be used multiple times)" << endl;
}
int main(int argc, char** argv) {
int verbosity = 0;
while (1) {
char ch = (char)getopt(argc, argv, "hvV");
if (ch == -1) break;
switch (ch) {
case 'h':
usage_long(argv[0]);
return 0;
case 'V':
std::cout << VERSION << std::endl;
return 0;
case 'v':
++verbosity;
break;
}
}
if (optind != argc - 1) {
usage(argv[0]);
return 1;
}
vector<string> keys;
ifstream f(argv[optind]);
if (!f.is_open()) {
std::cerr << "Failed to open " << argv[optind] << std::endl;
exit(-1);
}
string buffer;
while (!getline(f, buffer).eof()) keys.push_back(buffer);
for (uint32_t i = 0; i < keys.size(); ++i) string s = keys[i];
mph_map<string, string> table;
for (uint32_t i = 0; i < keys.size(); ++i) table[keys[i]] = keys[i];
mph_map<string, string>::const_iterator it = table.begin();
mph_map<string, string>::const_iterator end = table.end();
for (int i = 0; it != end; ++it, ++i) {
cout << i << ": " << it->first
<<" -> " << it->second << endl;
}
}

25
deps/cmph/cxxmph/dense_hash_map_test.cc vendored Normal file
View File

@@ -0,0 +1,25 @@
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <string>
#include "mph_map.h"
#include "map_tester.h"
#include "test.h"
using namespace cxxmph;
typedef MapTester<dense_hash_map> Tester;
CXXMPH_CXX_TEST_CASE(empty_find, Tester::empty_find);
CXXMPH_CXX_TEST_CASE(empty_erase, Tester::empty_erase);
CXXMPH_CXX_TEST_CASE(small_insert, Tester::small_insert);
CXXMPH_CXX_TEST_CASE(large_insert, Tester::large_insert);
CXXMPH_CXX_TEST_CASE(small_search, Tester::small_search);
CXXMPH_CXX_TEST_CASE(default_search, Tester::default_search);
CXXMPH_CXX_TEST_CASE(large_search, Tester::large_search);
CXXMPH_CXX_TEST_CASE(string_search, Tester::string_search);
CXXMPH_CXX_TEST_CASE(rehash_zero, Tester::rehash_zero);
CXXMPH_CXX_TEST_CASE(rehash_size, Tester::rehash_size);
CXXMPH_CXX_TEST_CASE(erase_value, Tester::erase_value);
CXXMPH_CXX_TEST_CASE(erase_iterator, Tester::erase_iterator);

81
deps/cmph/cxxmph/hollow_iterator.h vendored Normal file
View File

@@ -0,0 +1,81 @@
#ifndef __CXXMPH_HOLLOW_ITERATOR_H__
#define __CXXMPH_HOLLOW_ITERATOR_H__
#include <vector>
namespace cxxmph {
using std::vector;
template <typename container_type>
struct is_empty {
public:
is_empty() : c_(NULL), p_(NULL) {};
is_empty(const container_type* c, const vector<bool>* p) : c_(c), p_(p) {};
bool operator()(typename container_type::const_iterator it) const {
if (it == c_->end()) return false;
return !(*p_)[it - c_->begin()];
}
private:
const container_type* c_;
const vector<bool>* p_;
};
template <typename iterator, typename is_empty>
struct hollow_iterator_base
: public std::iterator<std::forward_iterator_tag,
typename iterator::value_type> {
public:
typedef hollow_iterator_base<iterator, is_empty> self_type;
typedef self_type& self_reference;
typedef typename iterator::reference reference;
typedef typename iterator::pointer pointer;
inline hollow_iterator_base() : it_(), empty_() { }
inline hollow_iterator_base(iterator it, is_empty empty, bool solid) : it_(it), empty_(empty) {
if (!solid) advance();
}
// Same as above, assumes solid==true.
inline hollow_iterator_base(iterator it, is_empty empty) : it_(it), empty_(empty) {}
inline hollow_iterator_base(const self_type& rhs) { it_ = rhs.it_; empty_ = rhs.empty_; }
template <typename const_iterator>
hollow_iterator_base(const hollow_iterator_base<const_iterator, is_empty>& rhs) { it_ = rhs.it_; empty_ = rhs.empty_; }
reference operator*() { return *it_; }
pointer operator->() { return &(*it_); }
self_reference operator++() { ++it_; advance(); return *this; }
// self_type operator++() { auto tmp(*this); ++tmp; return tmp; }
template <typename const_iterator>
bool operator==(const hollow_iterator_base<const_iterator, is_empty>& rhs) { return rhs.it_ == it_; }
template <typename const_iterator>
bool operator!=(const hollow_iterator_base<const_iterator, is_empty>& rhs) { return rhs.it_ != it_; }
// should be friend
iterator it_;
is_empty empty_;
private:
void advance() {
while (empty_(it_)) ++it_;
}
};
template <typename container_type, typename iterator>
inline auto make_solid(
container_type* v, const vector<bool>* p, iterator it) ->
hollow_iterator_base<iterator, is_empty<const container_type>> {
return hollow_iterator_base<iterator, is_empty<const container_type>>(
it, is_empty<const container_type>(v, p));
}
template <typename container_type, typename iterator>
inline auto make_hollow(
container_type* v, const vector<bool>* p, iterator it) ->
hollow_iterator_base<iterator, is_empty<const container_type>> {
return hollow_iterator_base<iterator, is_empty<const container_type>>(
it, is_empty<const container_type>(v, p), false);
}
} // namespace cxxmph
#endif // __CXXMPH_HOLLOW_ITERATOR_H__

View File

@@ -0,0 +1,49 @@
#include <cstdlib>
#include <cstdio>
#include <vector>
#include <iostream>
using std::cerr;
using std::endl;
using std::vector;
#include "hollow_iterator.h"
using cxxmph::hollow_iterator_base;
using cxxmph::make_hollow;
using cxxmph::is_empty;
int main(int, char**) {
vector<int> v;
vector<bool> p;
for (int i = 0; i < 100; ++i) {
v.push_back(i);
p.push_back(i % 2 == 0);
}
auto begin = make_hollow(&v, &p, v.begin());
auto end = make_hollow(&v, &p, v.end());
for (auto it = begin; it != end; ++it) {
if (((*it) % 2) != 0) exit(-1);
}
const vector<int>* cv(&v);
auto cbegin(make_hollow(cv, &p, cv->begin()));
auto cend(make_hollow(cv, &p, cv->begin()));
for (auto it = cbegin; it != cend; ++it) {
if (((*it) % 2) != 0) exit(-1);
}
const vector<bool>* cp(&p);
cbegin = make_hollow(cv, cp, v.begin());
cend = make_hollow(cv, cp, cv->end());
vector<int>::iterator vit1 = v.begin();
vector<int>::const_iterator vit2 = v.begin();
if (vit1 != vit2) exit(-1);
auto it1 = make_hollow(&v, &p, vit1);
auto it2 = make_hollow(&v, &p, vit2);
if (it1 != it2) exit(-1);
typedef is_empty<const vector<int>> iev;
hollow_iterator_base<vector<int>::iterator, iev> default_constructed;
default_constructed = make_hollow(&v, &p, v.begin());
return 0;
}

4
deps/cmph/cxxmph/map_tester.cc vendored Normal file
View File

@@ -0,0 +1,4 @@
#include "map_tester.h"
namespace cxxxmph {
}

138
deps/cmph/cxxmph/map_tester.h vendored Normal file
View File

@@ -0,0 +1,138 @@
#ifndef __CXXMPH_MAP_TEST_HELPER_H__
#define __CXXMPH_MAP_TEST_HELPER_H__
#include <cstdint>
#include <string>
#include <utility>
#include <vector>
#include <unordered_map>
#include "string_util.h"
#include <check.h>
namespace cxxmph {
using namespace cxxmph;
using namespace std;
template <template<typename...> class map_type>
struct MapTester {
static bool empty_find() {
map_type<int64_t, int64_t> m;
for (int i = 0; i < 1000; ++i) {
if (m.find(i) != m.end()) return false;
}
return true;
}
static bool empty_erase() {
map_type<int64_t, int64_t> m;
for (int i = 0; i < 1000; ++i) {
m.erase(i);
if (m.size()) return false;
}
return true;
}
static bool small_insert() {
map_type<int64_t, int64_t> m;
// Start counting from 1 to not touch default constructed value bugs
for (int i = 1; i < 12; ++i) m.insert(make_pair(i, i));
return m.size() == 11;
}
static bool large_insert() {
map_type<int64_t, int64_t> m;
// Start counting from 1 to not touch default constructed value bugs
int nkeys = 12 * 256 * 256;
for (int i = 1; i < nkeys; ++i) m.insert(make_pair(i, i));
return static_cast<int>(m.size()) == nkeys - 1;
}
static bool small_search() {
map_type<int64_t, int64_t> m;
// Start counting from 1 to not touch default constructed value bugs
for (int i = 1; i < 12; ++i) m.insert(make_pair(i, i));
for (int i = 1; i < 12; ++i) if (m.find(i) == m.end()) return false;
return true;
}
static bool default_search() {
map_type<int64_t, int64_t> m;
if (m.find(0) != m.end()) return false;
for (int i = 1; i < 256; ++i) m.insert(make_pair(i, i));
if (m.find(0) != m.end()) return false;
for (int i = 0; i < 256; ++i) m.insert(make_pair(i, i));
if (m.find(0) == m.end()) return false;
return true;
}
static bool large_search() {
int nkeys = 10 * 1000;
map_type<int64_t, int64_t> m;
for (int i = 0; i < nkeys; ++i) m.insert(make_pair(i, i));
for (int i = 0; i < nkeys; ++i) if (m.find(i) == m.end()) return false;
return true;
}
static bool string_search() {
int nkeys = 10 * 1000;
vector<string> keys;
for (int i = 0; i < nkeys; ++i) {
keys.push_back(format("%v", i));
}
map_type<string, int64_t> m;
for (int i = 0; i < nkeys; ++i) m.insert(make_pair(keys[i], i));
for (int i = 0; i < nkeys; ++i) {
auto it = m.find(keys[i]);
if (it == m.end()) return false;
if (it->second != i) return false;
}
return true;
}
static bool rehash_zero() {
map_type<int64_t, int64_t> m;
m.rehash(0);
return m.size() == 0;
}
static bool rehash_size() {
map_type<int64_t, int64_t> m;
int nkeys = 10 * 1000;
for (int i = 0; i < nkeys; ++i) { m.insert(make_pair(i, i)); }
m.rehash(nkeys);
for (int i = 0; i < nkeys; ++i) { if (m.find(i) == m.end()) return false; }
for (int i = nkeys; i < nkeys * 2; ++i) {
if (m.find(i) != m.end()) return false;
}
return true;
}
static bool erase_iterator() {
map_type<int64_t, int64_t> m;
int nkeys = 10 * 1000;
for (int i = 0; i < nkeys; ++i) { m.insert(make_pair(i, i)); }
for (int i = 0; i < nkeys; ++i) {
if (m.find(i) == m.end()) return false;
}
for (int i = nkeys - 1; i >= 0; --i) { if (m.find(i) == m.end()) return false; }
for (int i = nkeys - 1; i >= 0; --i) {
fail_unless(m.find(i) != m.end(), "after erase %d cannot be found", i);
fail_unless(m.find(i)->first == i, "after erase key %d cannot be found", i);
}
for (int i = nkeys - 1; i >= 0; --i) {
fail_unless(m.find(i) != m.end(), "after erase %d cannot be found", i);
fail_unless(m.find(i)->first == i, "after erase key %d cannot be found", i);
if (!(m.find(i)->first == i)) return false;
m.erase(m.find(i));
if (static_cast<int>(m.size()) != i) return false;
}
return true;
}
static bool erase_value() {
map_type<int64_t, int64_t> m;
int nkeys = 10 * 1000;
for (int i = 0; i < nkeys; ++i) { m.insert(make_pair(i, i)); }
for (int i = nkeys - 1; i >= 0; --i) {
fail_unless(m.find(i) != m.end());
m.erase(i);
if (static_cast<int>(m.size()) != i) return false;
}
return true;
}
};
} // namespace cxxxmph
#endif // __CXXMPH_MAP_TEST_HELPER_H__

17
deps/cmph/cxxmph/map_tester_test.cc vendored Normal file
View File

@@ -0,0 +1,17 @@
#include "map_tester.h"
#include "test.h"
using namespace cxxmph;
typedef MapTester<std::unordered_map> Tester;
CXXMPH_CXX_TEST_CASE(small_insert, Tester::small_insert);
CXXMPH_CXX_TEST_CASE(large_insert, Tester::large_insert);
CXXMPH_CXX_TEST_CASE(small_search, Tester::small_search);
CXXMPH_CXX_TEST_CASE(default_search, Tester::default_search);
CXXMPH_CXX_TEST_CASE(large_search, Tester::large_search);
CXXMPH_CXX_TEST_CASE(string_search, Tester::string_search);
CXXMPH_CXX_TEST_CASE(rehash_zero, Tester::rehash_zero);
CXXMPH_CXX_TEST_CASE(rehash_size, Tester::rehash_size);
CXXMPH_CXX_TEST_CASE(erase_value, Tester::erase_value);
CXXMPH_CXX_TEST_CASE(erase_iterator, Tester::erase_iterator);

11
deps/cmph/cxxmph/mph_bits.cc vendored Normal file
View File

@@ -0,0 +1,11 @@
#include "mph_bits.h"
namespace cxxmph {
const uint8_t dynamic_2bitset::vmask[] = { 0xfc, 0xf3, 0xcf, 0x3f};
dynamic_2bitset::dynamic_2bitset() : size_(0), fill_(false) {}
dynamic_2bitset::dynamic_2bitset(uint32_t size, bool fill)
: size_(size), fill_(fill), data_(ceil(size / 4.0), ones()*fill) {}
dynamic_2bitset::~dynamic_2bitset() {}
}

73
deps/cmph/cxxmph/mph_bits.h vendored Normal file
View File

@@ -0,0 +1,73 @@
#ifndef __CXXMPH_MPH_BITS_H__
#define __CXXMPH_MPH_BITS_H__
#include <stdint.h> // for uint32_t and friends
#include <array>
#include <cassert>
#include <climits>
#include <cmath>
#include <cstdio>
#include <cstring>
#include <limits>
#include <vector>
#include <utility>
namespace cxxmph {
class dynamic_2bitset {
public:
dynamic_2bitset();
~dynamic_2bitset();
dynamic_2bitset(uint32_t size, bool fill = false);
const uint8_t operator[](uint32_t i) const { return get(i); }
const uint8_t get(uint32_t i) const {
assert(i < size());
assert((i >> 2) < data_.size());
return (data_[(i >> 2)] >> (((i & 3) << 1)) & 3);
}
void set(uint32_t i, uint8_t v) {
assert((i >> 2) < data_.size());
data_[(i >> 2)] |= ones() ^ dynamic_2bitset::vmask[i & 3];
data_[(i >> 2)] &= ((v << ((i & 3) << 1)) | dynamic_2bitset::vmask[i & 3]);
assert(v <= 3);
assert(get(i) == v);
}
void resize(uint32_t size) {
size_ = size;
data_.resize(size >> 2, fill_*ones());
}
void swap(dynamic_2bitset& other) {
std::swap(other.size_, size_);
std::swap(other.fill_, fill_);
other.data_.swap(data_);
}
void clear() { data_.clear(); size_ = 0; }
uint32_t size() const { return size_; }
static const uint8_t vmask[];
const std::vector<uint8_t>& data() const { return data_; }
private:
uint32_t size_;
bool fill_;
std::vector<uint8_t> data_;
const uint8_t ones() { return std::numeric_limits<uint8_t>::max(); }
};
static uint32_t nextpoweroftwo(uint32_t k) {
if (k == 0) return 1;
k--;
for (uint32_t i=1; i<sizeof(uint32_t)*CHAR_BIT; i<<=1) k = k | k >> i;
return k+1;
}
// Interesting bit tricks that might end up here:
// http://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
// Fast a % (k*2^t)
// http://www.azillionmonkeys.com/qed/adiv.html
// rank and select:
// http://vigna.dsi.unimi.it/ftp/papers/Broadword.pdf
} // namespace cxxmph
#endif

61
deps/cmph/cxxmph/mph_bits_test.cc vendored Normal file
View File

@@ -0,0 +1,61 @@
#include <cstdio>
#include <cstdlib>
#include "mph_bits.h"
using cxxmph::dynamic_2bitset;
using cxxmph::nextpoweroftwo;
int main(int argc, char** argv) {
dynamic_2bitset small(256, true);
for (uint32_t i = 0; i < small.size(); ++i) small.set(i, i % 4);
for (uint32_t i = 0; i < small.size(); ++i) {
if (small[i] != i % 4) {
fprintf(stderr, "wrong bits %d at %d expected %d\n", small[i], i, i % 4);
exit(-1);
}
}
uint32_t size = 256;
dynamic_2bitset bits(size, true /* fill with ones */);
for (uint32_t i = 0; i < size; ++i) {
if (bits[i] != 3) {
fprintf(stderr, "wrong bits %d at %d expected %d\n", bits[i], i, 3);
exit(-1);
}
}
for (uint32_t i = 0; i < size; ++i) bits.set(i, 0);
for (uint32_t i = 0; i < size; ++i) {
if (bits[i] != 0) {
fprintf(stderr, "wrong bits %d at %d expected %d\n", bits[i], i, 0);
exit(-1);
}
}
for (uint32_t i = 0; i < size; ++i) bits.set(i, i % 4);
for (uint32_t i = 0; i < size; ++i) {
if (bits[i] != i % 4) {
fprintf(stderr, "wrong bits %d at %d expected %d\n", bits[i], i, i % 4);
exit(-1);
}
}
dynamic_2bitset size_corner1(1);
if (size_corner1.size() != 1) exit(-1);
dynamic_2bitset size_corner2(2);
if (size_corner2.size() != 2) exit(-1);
(dynamic_2bitset(4, true)).swap(size_corner2);
if (size_corner2.size() != 4) exit(-1);
for (uint32_t i = 0; i < size_corner2.size(); ++i) {
if (size_corner2[i] != 3) exit(-1);
}
size_corner2.clear();
if (size_corner2.size() != 0) exit(-1);
dynamic_2bitset empty;
empty.clear();
dynamic_2bitset large(1000, true);
empty.swap(large);
if (nextpoweroftwo(3) != 4) exit(-1);
}

229
deps/cmph/cxxmph/mph_index.cc vendored Normal file
View File

@@ -0,0 +1,229 @@
#include <limits>
#include <iostream>
#include <vector>
using std::cerr;
using std::endl;
#include "mph_index.h"
using std::vector;
namespace {
static const uint8_t kUnassigned = 3;
// table used for looking up the number of assigned vertices to a 8-bit integer
static uint8_t kBdzLookupIndex[] =
{
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1, 0
};
} // anonymous namespace
namespace cxxmph {
MPHIndex::~MPHIndex() {
clear();
}
void MPHIndex::clear() {
std::vector<uint32_t> empty_ranktable;
ranktable_.swap(empty_ranktable);
dynamic_2bitset empty_g;
g_.swap(empty_g);
}
bool MPHIndex::GenerateQueue(
TriGraph* graph, vector<uint32_t>* queue_output) {
uint32_t queue_head = 0, queue_tail = 0;
uint32_t nedges = m_;
uint32_t nvertices = n_;
// Relies on vector<bool> using 1 bit per element
vector<bool> marked_edge(nedges + 1, false);
vector<uint32_t> queue(nvertices, 0);
for (uint32_t i = 0; i < nedges; ++i) {
const TriGraph::Edge& e = graph->edges()[i];
if (graph->vertex_degree()[e[0]] == 1 ||
graph->vertex_degree()[e[1]] == 1 ||
graph->vertex_degree()[e[2]] == 1) {
if (!marked_edge[i]) {
queue[queue_head++] = i;
marked_edge[i] = true;
}
}
}
/*
for (unsigned int i = 0; i < marked_edge.size(); ++i) {
cerr << "vertex with degree " << static_cast<uint32_t>(graph->vertex_degree()[i]) << " marked " << marked_edge[i] << endl;
}
for (unsigned int i = 0; i < queue.size(); ++i) {
cerr << "vertex " << i << " queued at " << queue[i] << endl;
}
*/
// At this point queue head is the number of edges touching at least one
// vertex of degree 1.
// cerr << "Queue head " << queue_head << " Queue tail " << queue_tail << endl;
// graph->DebugGraph();
while (queue_tail != queue_head) {
uint32_t current_edge = queue[queue_tail++];
graph->RemoveEdge(current_edge);
const TriGraph::Edge& e = graph->edges()[current_edge];
for (int i = 0; i < 3; ++i) {
uint32_t v = e[i];
if (graph->vertex_degree()[v] == 1) {
uint32_t first_edge = graph->first_edge()[v];
if (!marked_edge[first_edge]) {
queue[queue_head++] = first_edge;
marked_edge[first_edge] = true;
}
}
}
}
/*
for (unsigned int i = 0; i < queue.size(); ++i) {
cerr << "vertex " << i << " queued at " << queue[i] << endl;
}
*/
int cycles = queue_head - nedges;
if (cycles == 0) queue.swap(*queue_output);
return cycles == 0;
}
void MPHIndex::Assigning(
const vector<TriGraph::Edge>& edges, const vector<uint32_t>& queue) {
uint32_t current_edge = 0;
vector<bool> marked_vertices(n_ + 1);
dynamic_2bitset(8, true).swap(g_);
// Initialize vector of half nibbles with all bits set.
dynamic_2bitset g(n_, true /* set bits to 1 */);
uint32_t nedges = m_; // for legibility
for (int i = nedges - 1; i + 1 >= 1; --i) {
current_edge = queue[i];
const TriGraph::Edge& e = edges[current_edge];
/*
cerr << "B: " << e[0] << " " << e[1] << " " << e[2] << " -> "
<< get_2bit_value(g_, e[0]) << " "
<< get_2bit_value(g_, e[1]) << " "
<< get_2bit_value(g_, e[2]) << " edge " << current_edge << endl;
*/
if (!marked_vertices[e[0]]) {
if (!marked_vertices[e[1]]) {
g.set(e[1], kUnassigned);
marked_vertices[e[1]] = true;
}
if (!marked_vertices[e[2]]) {
g.set(e[2], kUnassigned);
assert(marked_vertices.size() > e[2]);
marked_vertices[e[2]] = true;
}
g.set(e[0], (6 - (g[e[1]] + g[e[2]])) % 3);
marked_vertices[e[0]] = true;
} else if (!marked_vertices[e[1]]) {
if (!marked_vertices[e[2]]) {
g.set(e[2], kUnassigned);
marked_vertices[e[2]] = true;
}
g.set(e[1], (7 - (g[e[0]] + g[e[2]])) % 3);
marked_vertices[e[1]] = true;
} else {
g.set(e[2], (8 - (g[e[0]] + g[e[1]])) % 3);
marked_vertices[e[2]] = true;
}
/*
cerr << "A: " << e[0] << " " << e[1] << " " << e[2] << " -> "
<< static_cast<uint32_t>(g[e[0]]) << " "
<< static_cast<uint32_t>(g[e[1]]) << " "
<< static_cast<uint32_t>(g[e[2]]) << " " << endl;
*/
}
g_.swap(g);
}
void MPHIndex::Ranking() {
uint32_t nbytes_total = static_cast<uint32_t>(ceil(n_ / 4.0));
uint32_t size = k_ >> 2U;
uint32_t ranktable_size = static_cast<uint32_t>(
ceil(n_ / static_cast<double>(k_)));
vector<uint32_t> ranktable(ranktable_size);
uint32_t offset = 0;
uint32_t count = 0;
uint32_t i = 1;
while (1) {
if (i == ranktable.size()) break;
uint32_t nbytes = size < nbytes_total ? size : nbytes_total;
for (uint32_t j = 0; j < nbytes; ++j) {
count += kBdzLookupIndex[g_.data()[offset + j]];
}
ranktable[i] = count;
offset += nbytes;
nbytes_total -= size;
++i;
}
ranktable_.swap(ranktable);
}
uint32_t MPHIndex::Rank(uint32_t vertex) const {
if (ranktable_.empty()) return 0;
uint32_t index = vertex >> b_;
uint32_t base_rank = ranktable_[index];
uint32_t beg_idx_v = index << b_;
uint32_t beg_idx_b = beg_idx_v >> 2;
uint32_t end_idx_b = vertex >> 2;
while (beg_idx_b < end_idx_b) {
assert(g_.data().size() > beg_idx_b);
base_rank += kBdzLookupIndex[g_.data()[beg_idx_b++]];
}
beg_idx_v = beg_idx_b << 2;
/*
cerr << "beg_idx_v: " << beg_idx_v << endl;
cerr << "base rank: " << base_rank << endl;
cerr << "G: ";
for (unsigned int i = 0; i < n_; ++i) {
cerr << static_cast<uint32_t>(g_[i]) << " ";
}
cerr << endl;
*/
while (beg_idx_v < vertex) {
if (g_[beg_idx_v] != kUnassigned) ++base_rank;
++beg_idx_v;
}
// cerr << "Base rank: " << base_rank << endl;
return base_rank;
}
void MPHIndex::swap(std::vector<uint32_t>& params, dynamic_2bitset& g, std::vector<uint32_t>& ranktable) {
params.resize(12);
uint32_t rounded_c = c_ * 1000 * 1000;
std::swap(params[0], rounded_c);
c_ = static_cast<double>(rounded_c) / 1000 / 1000;
std::swap(params[1], m_);
std::swap(params[2], n_);
std::swap(params[3], k_);
uint32_t uint32_square = static_cast<uint32_t>(square_);
std::swap(params[4], uint32_square);
square_ = uint32_square;
std::swap(params[5], hash_seed_[0]);
std::swap(params[6], hash_seed_[1]);
std::swap(params[7], hash_seed_[2]);
g.swap(g_);
ranktable.swap(ranktable_);
}
} // namespace cxxmph

270
deps/cmph/cxxmph/mph_index.h vendored Normal file
View File

@@ -0,0 +1,270 @@
#ifndef __CXXMPH_MPH_INDEX_H__
#define __CXXMPH_MPH_INDEX_H__
// Minimal perfect hash abstraction implementing the BDZ algorithm
//
// This is a data structure that given a set of known keys S, will create a
// mapping from S to [0..|S|). The class is informed about S through the Reset
// method and the mapping is queried by calling index(key).
//
// This is a pretty uncommon data structure, and if you application has a real
// use case for it, chances are that it is a real win. If all you are doing is
// a straightforward implementation of an in-memory associative mapping data
// structure, then it will probably be slower. Take a look at mph_map.h
// instead.
//
// Thesis presenting this and similar algorithms:
// http://homepages.dcc.ufmg.br/~fbotelho/en/talks/thesis2008/thesis.pdf
//
// Notes:
//
// Most users can use the SimpleMPHIndex wrapper instead of the MPHIndex which
// have confusing template parameters.
// This class only implements a minimal perfect hash function, it does not
// implement an associative mapping data structure.
#include <stdint.h>
#include <cassert>
#include <climits>
#include <cmath>
#include <unordered_map> // for std::hash
#include <vector>
#include <iostream>
using std::cerr;
using std::endl;
#include "seeded_hash.h"
#include "mph_bits.h"
#include "trigraph.h"
namespace cxxmph {
class MPHIndex {
public:
MPHIndex(bool square = false, double c = 1.23, uint8_t b = 7) :
c_(c), b_(b), m_(0), n_(0), k_(0), square_(square), r_(1), g_(8, true) {
nest_displacement_[0] = 0;
nest_displacement_[1] = r_;
nest_displacement_[2] = (r_ << 1);
}
~MPHIndex();
template <class SeededHashFcn, class ForwardIterator>
bool Reset(ForwardIterator begin, ForwardIterator end, uint32_t size);
template <class SeededHashFcn, class Key> // must agree with Reset
// Get a unique identifier for k, in the range [0;size()). If x wasn't part
// of the input in the last Reset call, returns a random value.
uint32_t index(const Key& x) const;
uint32_t size() const { return m_; }
void clear();
// Advanced users functions. Please avoid unless you know what you are doing.
uint32_t perfect_hash_size() const { return n_; }
template <class SeededHashFcn, class Key> // must agree with Reset
uint32_t perfect_hash(const Key& x) const; // way faster than the minimal
template <class SeededHashFcn, class Key> // must agree with Reset
uint32_t perfect_square(const Key& x) const; // even faster but needs square=true
uint32_t minimal_perfect_hash_size() const { return size(); }
template <class SeededHashFcn, class Key> // must agree with Reset
uint32_t minimal_perfect_hash(const Key& x) const;
// Experimental api to use as a serialization building block.
// Since this signature exposes some implementation details, expect it to
// change.
void swap(std::vector<uint32_t>& params, dynamic_2bitset& g, std::vector<uint32_t>& ranktable);
private:
template <class SeededHashFcn, class ForwardIterator>
bool Mapping(ForwardIterator begin, ForwardIterator end,
std::vector<TriGraph::Edge>* edges,
std::vector<uint32_t>* queue);
bool GenerateQueue(TriGraph* graph, std::vector<uint32_t>* queue);
void Assigning(const std::vector<TriGraph::Edge>& edges,
const std::vector<uint32_t>& queue);
void Ranking();
uint32_t Rank(uint32_t vertex) const;
// Algorithm parameters
// Perfect hash function density. If this was a 2graph,
// then probability of having an acyclic graph would be
// sqrt(1-(2/c)^2). See section 3 for details.
// http://www.it-c.dk/people/pagh/papers/simpleperf.pdf
double c_;
uint8_t b_; // Number of bits of the kth index in the ranktable
// Values used during generation
uint32_t m_; // edges count
uint32_t n_; // vertex count
uint32_t k_; // kth index in ranktable, $k = log_2(n=3r)\varepsilon$
bool square_; // make bit vector size a power of 2
// Values used during search
// Partition vertex count, derived from c parameter.
uint32_t r_;
uint32_t nest_displacement_[3]; // derived from r_
// The array containing the minimal perfect hash function graph.
dynamic_2bitset g_;
uint8_t threebit_mod3[10]; // speed up mod3 calculation for 3bit ints
// The table used for the rank step of the minimal perfect hash function
std::vector<uint32_t> ranktable_;
// The selected hash seed triplet for finding the edges in the minimal
// perfect hash function graph.
uint32_t hash_seed_[3];
};
// Template method needs to go in the header file.
template <class SeededHashFcn, class ForwardIterator>
bool MPHIndex::Reset(
ForwardIterator begin, ForwardIterator end, uint32_t size) {
if (end == begin) {
clear();
return true;
}
m_ = size;
r_ = static_cast<uint32_t>(ceil((c_*m_)/3));
if ((r_ % 2) == 0) r_ += 1;
// This can be used to speed mods, but increases occupation too much.
// Needs to try http://gmplib.org/manual/Integer-Exponentiation.html instead
if (square_) r_ = nextpoweroftwo(r_);
nest_displacement_[0] = 0;
nest_displacement_[1] = r_;
nest_displacement_[2] = (r_ << 1);
for (uint32_t i = 0; i < sizeof(threebit_mod3); ++i) threebit_mod3[i] = i % 3;
n_ = 3*r_;
k_ = 1U << b_;
// cerr << "m " << m_ << " n " << n_ << " r " << r_ << endl;
int iterations = 1000;
std::vector<TriGraph::Edge> edges;
std::vector<uint32_t> queue;
while (1) {
// cerr << "Iterations missing: " << iterations << endl;
for (int i = 0; i < 3; ++i) hash_seed_[i] = random();
if (Mapping<SeededHashFcn>(begin, end, &edges, &queue)) break;
else --iterations;
if (iterations == 0) break;
}
if (iterations == 0) return false;
Assigning(edges, queue);
std::vector<TriGraph::Edge>().swap(edges);
Ranking();
return true;
}
template <class SeededHashFcn, class ForwardIterator>
bool MPHIndex::Mapping(
ForwardIterator begin, ForwardIterator end,
std::vector<TriGraph::Edge>* edges, std::vector<uint32_t>* queue) {
TriGraph graph(n_, m_);
for (ForwardIterator it = begin; it != end; ++it) {
h128 h = SeededHashFcn().hash128(*it, hash_seed_[0]);
// for (int i = 0; i < 3; ++i) h[i] = SeededHashFcn()(*it, hash_seed_[i]);
uint32_t v0 = h[0] % r_;
uint32_t v1 = h[1] % r_ + r_;
uint32_t v2 = h[2] % r_ + (r_ << 1);
// cerr << "Key: " << *it << " edge " << it - begin << " (" << v0 << "," << v1 << "," << v2 << ")" << endl;
graph.AddEdge(TriGraph::Edge(v0, v1, v2));
}
if (GenerateQueue(&graph, queue)) {
graph.ExtractEdgesAndClear(edges);
return true;
}
return false;
}
template <class SeededHashFcn, class Key>
uint32_t MPHIndex::perfect_square(const Key& key) const {
h128 h = SeededHashFcn().hash128(key, hash_seed_[0]);
h[0] = (h[0] & (r_-1)) + nest_displacement_[0];
h[1] = (h[1] & (r_-1)) + nest_displacement_[1];
h[2] = (h[2] & (r_-1)) + nest_displacement_[2];
assert((h[0]) < g_.size());
assert((h[1]) < g_.size());
assert((h[2]) < g_.size());
uint8_t nest = threebit_mod3[g_[h[0]] + g_[h[1]] + g_[h[2]]];
uint32_t vertex = h[nest];
return vertex;
}
template <class SeededHashFcn, class Key>
uint32_t MPHIndex::perfect_hash(const Key& key) const {
if (!g_.size()) return 0;
h128 h = SeededHashFcn().hash128(key, hash_seed_[0]);
h[0] = (h[0] % r_) + nest_displacement_[0];
h[1] = (h[1] % r_) + nest_displacement_[1];
h[2] = (h[2] % r_) + nest_displacement_[2];
assert((h[0]) < g_.size());
assert((h[1]) < g_.size());
assert((h[2]) < g_.size());
uint8_t nest = threebit_mod3[g_[h[0]] + g_[h[1]] + g_[h[2]]];
uint32_t vertex = h[nest];
return vertex;
}
template <class SeededHashFcn, class Key>
uint32_t MPHIndex::minimal_perfect_hash(const Key& key) const {
return Rank(perfect_hash<SeededHashFcn, Key>(key));
}
template <class SeededHashFcn, class Key>
uint32_t MPHIndex::index(const Key& key) const {
return minimal_perfect_hash<SeededHashFcn, Key>(key);
}
// Simple wrapper around MPHIndex to simplify calling code. Please refer to the
// MPHIndex class for documentation.
template <class Key, class HashFcn = typename seeded_hash<std::hash<Key>>::hash_function>
class SimpleMPHIndex : public MPHIndex {
public:
SimpleMPHIndex(bool advanced_usage = false) : MPHIndex(advanced_usage) {}
template <class ForwardIterator>
bool Reset(ForwardIterator begin, ForwardIterator end, uint32_t size) {
return MPHIndex::Reset<HashFcn>(begin, end, size);
}
uint32_t index(const Key& key) const { return MPHIndex::index<HashFcn>(key); }
};
// The parameters minimal and square trade memory usage for evaluation speed.
// Minimal decreases speed and memory usage, and square does the opposite.
// Using minimal=true and square=false is the same as SimpleMPHIndex.
template <bool minimal, bool square, class Key, class HashFcn>
struct FlexibleMPHIndex {};
template <class Key, class HashFcn>
struct FlexibleMPHIndex<true, false, Key, HashFcn>
: public SimpleMPHIndex<Key, HashFcn> {
FlexibleMPHIndex() : SimpleMPHIndex<Key, HashFcn>(false) {}
uint32_t index(const Key& key) const {
return MPHIndex::minimal_perfect_hash<HashFcn>(key); }
uint32_t size() const { return MPHIndex::minimal_perfect_hash_size(); }
};
template <class Key, class HashFcn>
struct FlexibleMPHIndex<false, true, Key, HashFcn>
: public SimpleMPHIndex<Key, HashFcn> {
FlexibleMPHIndex() : SimpleMPHIndex<Key, HashFcn>(true) {}
uint32_t index(const Key& key) const {
return MPHIndex::perfect_square<HashFcn>(key); }
uint32_t size() const { return MPHIndex::perfect_hash_size(); }
};
template <class Key, class HashFcn>
struct FlexibleMPHIndex<false, false, Key, HashFcn>
: public SimpleMPHIndex<Key, HashFcn> {
FlexibleMPHIndex() : SimpleMPHIndex<Key, HashFcn>(false) {}
uint32_t index(const Key& key) const {
return MPHIndex::perfect_hash<HashFcn>(key); }
uint32_t size() const { return MPHIndex::perfect_hash_size(); }
};
// From a trade-off perspective this case does not make much sense.
// template <class Key, class HashFcn>
// class FlexibleMPHIndex<true, true, Key, HashFcn>
} // namespace cxxmph
#endif // __CXXMPH_MPH_INDEX_H__

53
deps/cmph/cxxmph/mph_index_test.cc vendored Normal file
View File

@@ -0,0 +1,53 @@
#include <algorithm>
#include <cassert>
#include <string>
#include <vector>
#include "mph_index.h"
using std::string;
using std::vector;
using namespace cxxmph;
int main(int argc, char** argv) {
srand(1);
vector<string> keys;
keys.push_back("davi");
keys.push_back("paulo");
keys.push_back("joao");
keys.push_back("maria");
keys.push_back("bruno");
keys.push_back("paula");
keys.push_back("diego");
keys.push_back("diogo");
keys.push_back("algume");
SimpleMPHIndex<string> mph_index;
if (!mph_index.Reset(keys.begin(), keys.end(), keys.size())) { exit(-1); }
vector<int> ids;
for (vector<int>::size_type i = 0; i < keys.size(); ++i) {
ids.push_back(mph_index.index(keys[i]));
cerr << " " << *(ids.end() - 1);
}
cerr << endl;
sort(ids.begin(), ids.end());
for (vector<int>::size_type i = 0; i < ids.size(); ++i) assert(ids[i] == static_cast<vector<int>::value_type>(i));
// Test serialization
vector<uint32_t> params;
dynamic_2bitset g;
vector<uint32_t> ranktable;
mph_index.swap(params, g, ranktable);
assert(mph_index.size() == 0);
mph_index.swap(params, g, ranktable);
assert(mph_index.size() == ids.size());
for (vector<int>::size_type i = 0; i < ids.size(); ++i) assert(ids[i] == static_cast<vector<int>::value_type>(i));
FlexibleMPHIndex<false, true, int64_t, seeded_hash<std::hash<int64_t>>::hash_function> square_empty;
auto id = square_empty.index(1);
FlexibleMPHIndex<false, false, int64_t, seeded_hash<std::hash<int64_t>>::hash_function> unordered_empty;
id ^= unordered_empty.index(1);
FlexibleMPHIndex<true, false, int64_t, seeded_hash<std::hash<int64_t>>::hash_function> minimal_empty;
id ^= minimal_empty.index(1);
}

272
deps/cmph/cxxmph/mph_map.h vendored Normal file
View File

@@ -0,0 +1,272 @@
#ifndef __CXXMPH_MPH_MAP_H__
#define __CXXMPH_MPH_MAP_H__
// Implementation of the unordered associative mapping interface using a
// minimal perfect hash function.
//
// Since these are header-mostly libraries, make sure you compile your code
// with -DNDEBUG and -O3. The code requires a modern C++11 compiler.
//
// The container comes in 3 flavors, all in the cxxmph namespace and drop-in
// replacement for the popular classes with the same names.
// * dense_hash_map
// -> fast, uses more memory, 2.93 bits per bucket, ~50% occupation
// * unordered_map (aliases: hash_map, mph_map)
// -> middle ground, uses 2.93 bits per bucket, ~81% occupation
// * sparse_hash_map -> slower, uses 3.6 bits per bucket
// -> less fast, uses 3.6 bits per bucket, 100% occupation
//
// Those classes are not necessarily faster than their existing counterparts.
// Benchmark your code before using it. The larger the key, the larger the
// number of elements inserted, and the bigger the number of failed searches,
// the more likely those classes will outperform existing code.
//
// For large sets of urls (>100k), which are a somewhat expensive to compare, I
// found those class to be about 10%-50% faster than unordered_map.
#include <algorithm>
#include <iostream>
#include <limits>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <utility> // for std::pair
#include "string_util.h"
#include "hollow_iterator.h"
#include "mph_bits.h"
#include "mph_index.h"
#include "seeded_hash.h"
namespace cxxmph {
using std::pair;
using std::make_pair;
using std::vector;
// Save on repetitive typing.
#define MPH_MAP_TMPL_SPEC \
template <bool minimal, bool square, \
class Key, class Data, class HashFcn, class EqualKey, class Alloc>
#define MPH_MAP_CLASS_SPEC mph_map_base<minimal, square, Key, Data, HashFcn, EqualKey, Alloc>
#define MPH_MAP_METHOD_DECL(r, m) MPH_MAP_TMPL_SPEC typename MPH_MAP_CLASS_SPEC::r MPH_MAP_CLASS_SPEC::m
#define MPH_MAP_INLINE_METHOD_DECL(r, m) MPH_MAP_TMPL_SPEC inline typename MPH_MAP_CLASS_SPEC::r MPH_MAP_CLASS_SPEC::m
template <bool minimal, bool square, class Key, class Data, class HashFcn = std::hash<Key>, class EqualKey = std::equal_to<Key>, class Alloc = std::allocator<Data> >
class mph_map_base {
public:
typedef Key key_type;
typedef Data data_type;
typedef pair<Key, Data> value_type;
typedef HashFcn hasher;
typedef EqualKey key_equal;
typedef typename vector<value_type>::pointer pointer;
typedef typename vector<value_type>::reference reference;
typedef typename vector<value_type>::const_reference const_reference;
typedef typename vector<value_type>::size_type size_type;
typedef typename vector<value_type>::difference_type difference_type;
typedef is_empty<const vector<value_type>> is_empty_type;
typedef hollow_iterator_base<typename vector<value_type>::iterator, is_empty_type> iterator;
typedef hollow_iterator_base<typename vector<value_type>::const_iterator, is_empty_type> const_iterator;
// For making macros simpler.
typedef void void_type;
typedef bool bool_type;
typedef pair<iterator, bool> insert_return_type;
mph_map_base();
~mph_map_base();
iterator begin();
iterator end();
const_iterator begin() const;
const_iterator end() const;
size_type size() const;
bool empty() const;
void clear();
void erase(iterator pos);
void erase(const key_type& k);
pair<iterator, bool> insert(const value_type& x);
inline iterator find(const key_type& k);
inline const_iterator find(const key_type& k) const;
typedef int32_t my_int32_t; // help macros
inline int32_t index(const key_type& k) const;
data_type& operator[](const key_type &k);
const data_type& operator[](const key_type &k) const;
size_type bucket_count() const { return index_.size() + slack_.bucket_count(); }
void rehash(size_type nbuckets /*ignored*/);
protected: // mimicking STL implementation
EqualKey equal_;
private:
template <typename iterator>
struct iterator_first : public iterator {
iterator_first(iterator it) : iterator(it) { }
const typename iterator::value_type::first_type& operator*() {
return this->iterator::operator*().first;
}
};
template <typename iterator>
iterator_first<iterator> make_iterator_first(iterator it) {
return iterator_first<iterator>(it);
}
void pack();
vector<value_type> values_;
vector<bool> present_;
FlexibleMPHIndex<minimal, square, Key, typename seeded_hash<HashFcn>::hash_function> index_;
// TODO(davi) optimize slack to use hash from index rather than calculate its own
typedef std::unordered_map<h128, uint32_t, h128::hash32> slack_type;
slack_type slack_;
size_type size_;
typename seeded_hash<HashFcn>::hash_function hasher128_;
};
MPH_MAP_TMPL_SPEC
bool operator==(const MPH_MAP_CLASS_SPEC& lhs, const MPH_MAP_CLASS_SPEC& rhs) {
return lhs.size() == rhs.size() && std::equal(lhs.begin(), lhs.end(), rhs.begin());
}
MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::mph_map_base() : size_(0) {
clear();
pack();
}
MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::~mph_map_base() { }
MPH_MAP_METHOD_DECL(insert_return_type, insert)(const value_type& x) {
auto it = find(x.first);
auto it_end = end();
if (it != it_end) return make_pair(it, false);
bool should_pack = false;
if (values_.capacity() == values_.size() && values_.size() > 256) {
should_pack = true;
}
values_.push_back(x);
present_.push_back(true);
++size_;
h128 h = hasher128_.hash128(x.first, 0);
if (slack_.find(h) != slack_.end()) should_pack = true; // unavoidable pack
else slack_.insert(std::make_pair(h, values_.size() - 1));
if (should_pack) pack();
it = find(x.first);
return make_pair(it, true);
}
MPH_MAP_METHOD_DECL(void_type, pack)() {
// CXXMPH_DEBUGLN("Packing %v values")(values_.size());
if (values_.empty()) return;
assert(std::unordered_set<key_type>(make_iterator_first(begin()), make_iterator_first(end())).size() == size());
bool success = index_.Reset(
make_iterator_first(begin()),
make_iterator_first(end()), size_);
if (!success) { exit(-1); }
vector<value_type> new_values(index_.size());
new_values.reserve(new_values.size() * 2);
vector<bool> new_present(index_.size(), false);
new_present.reserve(new_present.size() * 2);
for (iterator it = begin(), it_end = end(); it != it_end; ++it) {
size_type id = index_.index(it->first);
assert(id < index_.size());
assert(id < new_values.size());
new_values[id] = *it;
new_present[id] = true;
}
// fprintf(stderr, "Collision ratio: %f\n", collisions*1.0/size());
values_.swap(new_values);
present_.swap(new_present);
slack_type().swap(slack_);
}
MPH_MAP_METHOD_DECL(iterator, begin)() { return make_hollow(&values_, &present_, values_.begin()); }
MPH_MAP_METHOD_DECL(iterator, end)() { return make_solid(&values_, &present_, values_.end()); }
MPH_MAP_METHOD_DECL(const_iterator, begin)() const { return make_hollow(&values_, &present_, values_.begin()); }
MPH_MAP_METHOD_DECL(const_iterator, end)() const { return make_solid(&values_, &present_, values_.end()); }
MPH_MAP_METHOD_DECL(bool_type, empty)() const { return size_ == 0; }
MPH_MAP_METHOD_DECL(size_type, size)() const { return size_; }
MPH_MAP_METHOD_DECL(void_type, clear)() {
values_.clear();
present_.clear();
slack_.clear();
index_.clear();
size_ = 0;
}
MPH_MAP_METHOD_DECL(void_type, erase)(iterator pos) {
assert(pos.it_ - values_.begin() < present_.size());
assert(present_[pos.it_ - values_.begin()]);
present_[pos.it_ - values_.begin()] = false;
*pos = value_type();
--size_;
}
MPH_MAP_METHOD_DECL(void_type, erase)(const key_type& k) {
iterator it = find(k);
if (it == end()) return;
erase(it);
}
MPH_MAP_INLINE_METHOD_DECL(const_iterator, find)(const key_type& k) const {
auto idx = index(k);
typename vector<value_type>::const_iterator vit = values_.begin() + idx;
if (idx == -1 || !equal_(vit->first, k)) return end();
return make_solid(&values_, &present_, vit);;
}
MPH_MAP_INLINE_METHOD_DECL(iterator, find)(const key_type& k) {
auto idx = index(k);
typename vector<value_type>::iterator vit = values_.begin() + idx;
if (idx == -1 || !equal_(vit->first, k)) return end();
return make_solid(&values_, &present_, vit);;
}
MPH_MAP_INLINE_METHOD_DECL(my_int32_t, index)(const key_type& k) const {
if (__builtin_expect(!slack_.empty(), 0)) {
auto sit = slack_.find(hasher128_.hash128(k, 0));
if (sit != slack_.end()) return sit->second;
}
if (__builtin_expect(index_.size(), 1)) {
auto id = index_.index(k);
if (__builtin_expect(present_[id], true)) return id;
}
return -1;
}
MPH_MAP_METHOD_DECL(data_type&, operator[])(const key_type& k) {
return insert(make_pair(k, data_type())).first->second;
}
MPH_MAP_METHOD_DECL(void_type, rehash)(size_type /*nbuckets*/) {
pack();
vector<value_type>(values_.begin(), values_.end()).swap(values_);
vector<bool>(present_.begin(), present_.end()).swap(present_);
slack_type().swap(slack_);
}
#define MPH_MAP_PREAMBLE template <class Key, class Data,\
class HashFcn = std::hash<Key>, class EqualKey = std::equal_to<Key>,\
class Alloc = std::allocator<Data> >
MPH_MAP_PREAMBLE class mph_map : public mph_map_base<
false, false, Key, Data, HashFcn, EqualKey, Alloc> {};
MPH_MAP_PREAMBLE class unordered_map : public mph_map_base<
false, false, Key, Data, HashFcn, EqualKey, Alloc> {};
MPH_MAP_PREAMBLE class hash_map : public mph_map_base<
false, false, Key, Data, HashFcn, EqualKey, Alloc> {};
MPH_MAP_PREAMBLE class dense_hash_map : public mph_map_base<
false, true, Key, Data, HashFcn, EqualKey, Alloc> {};
MPH_MAP_PREAMBLE class sparse_hash_map : public mph_map_base<
true, false, Key, Data, HashFcn, EqualKey, Alloc> {};
#undef MPH_MAP_TMPL_SPEC
#undef MPH_MAP_CLASS_SPEC
#undef MPH_MAP_METHOD_DECL
#undef MPH_MAP_INLINE_METHOD_DECL
#undef MPH_MAP_PREAMBLE
} // namespace cxxmph
#endif // __CXXMPH_MPH_MAP_H__

25
deps/cmph/cxxmph/mph_map_test.cc vendored Normal file
View File

@@ -0,0 +1,25 @@
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <string>
#include "mph_map.h"
#include "map_tester.h"
#include "test.h"
using namespace cxxmph;
typedef MapTester<mph_map> Tester;
CXXMPH_CXX_TEST_CASE(empty_find, Tester::empty_find);
CXXMPH_CXX_TEST_CASE(empty_erase, Tester::empty_erase);
CXXMPH_CXX_TEST_CASE(small_insert, Tester::small_insert);
CXXMPH_CXX_TEST_CASE(large_insert, Tester::large_insert);
CXXMPH_CXX_TEST_CASE(small_search, Tester::small_search);
CXXMPH_CXX_TEST_CASE(default_search, Tester::default_search);
CXXMPH_CXX_TEST_CASE(large_search, Tester::large_search);
CXXMPH_CXX_TEST_CASE(string_search, Tester::string_search);
CXXMPH_CXX_TEST_CASE(rehash_zero, Tester::rehash_zero);
CXXMPH_CXX_TEST_CASE(rehash_size, Tester::rehash_size);
CXXMPH_CXX_TEST_CASE(erase_value, Tester::erase_value);
CXXMPH_CXX_TEST_CASE(erase_iterator, Tester::erase_iterator);

147
deps/cmph/cxxmph/seeded_hash.h vendored Normal file
View File

@@ -0,0 +1,147 @@
#ifndef __CXXMPH_SEEDED_HASH_H__
#define __CXXMPH_SEEDED_HASH_H__
#include <stdint.h> // for uint32_t and friends
#include <cstdlib>
#include <cstring>
#include <unordered_map> // for std::hash
#include "MurmurHash3.h"
#include "stringpiece.h"
namespace cxxmph {
struct h128 {
const uint32_t& operator[](uint8_t i) const { return uint32[i]; }
uint32_t& operator[](uint8_t i) { return uint32[i]; }
uint64_t get64(bool second) const { return (static_cast<uint64_t>(uint32[second << 1]) << 32) | uint32[1 + (second << 1)]; }
void set64(uint64_t v, bool second) { uint32[second << 1] = v >> 32; uint32[1+(second<<1)] = ((v << 32) >> 32); }
bool operator==(const h128 rhs) const { return memcmp(uint32, rhs.uint32, sizeof(uint32)) == 0; }
uint32_t uint32[4];
struct hash32 { uint32_t operator()(const cxxmph::h128& h) const { return h[3]; } };
};
template <class HashFcn>
struct seeded_hash_function {
template <class Key>
uint32_t operator()(const Key& k, uint32_t seed) const {
uint32_t h;
uint32_t h0 = HashFcn()(k);
MurmurHash3_x86_32(reinterpret_cast<const void*>(&h0), 4, seed, &h);
return h;
}
template <class Key>
h128 hash128(const Key& k, uint32_t seed) const {
h128 h;
uint32_t h0 = HashFcn()(k);
MurmurHash3_x64_128(reinterpret_cast<const void*>(&h0), 4, seed, &h);
return h;
}
};
struct Murmur3 {
template<class Key>
uint32_t operator()(const Key& k) const {
uint32_t out;
MurmurHash3_x86_32(reinterpret_cast<const void*>(&k), sizeof(Key), 1 /* seed */, &out);
return out;
}
template <class Key>
h128 hash128(const Key& k) const {
h128 h;
MurmurHash3_x64_128(reinterpret_cast<const void*>(&k), sizeof(Key), 1 /* seed */, &h);
return h;
}
};
struct Murmur3StringPiece {
template <class Key>
uint32_t operator()(const Key& k) const {
StringPiece s(k);
uint32_t out;
MurmurHash3_x86_32(s.data(), s.length(), 1 /* seed */, &out);
return out;
}
template <class Key>
h128 hash128(const Key& k) const {
h128 h;
StringPiece s(k);
MurmurHash3_x64_128(s.data(), s.length(), 1 /* seed */, &h);
return h;
}
};
template <>
struct seeded_hash_function<Murmur3> {
template <class Key>
uint32_t operator()(const Key& k, uint32_t seed) const {
uint32_t out;
MurmurHash3_x86_32(reinterpret_cast<const void*>(&k), sizeof(Key), seed, &out);
return out;
}
template <class Key>
h128 hash128(const Key& k, uint32_t seed) const {
h128 h;
MurmurHash3_x64_128(reinterpret_cast<const void*>(&k), sizeof(Key), seed, &h);
return h;
}
};
template <>
struct seeded_hash_function<Murmur3StringPiece> {
template <class Key>
uint32_t operator()(const Key& k, uint32_t seed) const {
StringPiece s(k);
uint32_t out;
MurmurHash3_x86_32(s.data(), s.length(), seed, &out);
return out;
}
template <class Key>
h128 hash128(const Key& k, uint32_t seed) const {
h128 h;
StringPiece s(k);
MurmurHash3_x64_128(s.data(), s.length(), seed, &h);
return h;
}
};
template <class HashFcn> struct seeded_hash
{ typedef seeded_hash_function<HashFcn> hash_function; };
// Use Murmur3 instead for all types defined in std::hash, plus
// std::string which is commonly extended.
template <> struct seeded_hash<std::hash<char*> >
{ typedef seeded_hash_function<Murmur3StringPiece> hash_function; };
template <> struct seeded_hash<std::hash<const char*> >
{ typedef seeded_hash_function<Murmur3StringPiece> hash_function; };
template <> struct seeded_hash<std::hash<std::string> >
{ typedef seeded_hash_function<Murmur3StringPiece> hash_function; };
template <> struct seeded_hash<std::hash<cxxmph::StringPiece> >
{ typedef seeded_hash_function<Murmur3StringPiece> hash_function; };
template <> struct seeded_hash<std::hash<char> >
{ typedef seeded_hash_function<Murmur3> hash_function; };
template <> struct seeded_hash<std::hash<unsigned char> >
{ typedef seeded_hash_function<Murmur3> hash_function; };
template <> struct seeded_hash<std::hash<short> >
{ typedef seeded_hash_function<Murmur3> hash_function; };
template <> struct seeded_hash<std::hash<unsigned short> >
{ typedef seeded_hash_function<Murmur3> hash_function; };
template <> struct seeded_hash<std::hash<int> >
{ typedef seeded_hash_function<Murmur3> hash_function; };
template <> struct seeded_hash<std::hash<unsigned int> >
{ typedef seeded_hash_function<Murmur3> hash_function; };
template <> struct seeded_hash<std::hash<long> >
{ typedef seeded_hash_function<Murmur3> hash_function; };
template <> struct seeded_hash<std::hash<unsigned long> >
{ typedef seeded_hash_function<Murmur3> hash_function; };
template <> struct seeded_hash<std::hash<long long> >
{ typedef seeded_hash_function<Murmur3> hash_function; };
template <> struct seeded_hash<std::hash<unsigned long long> >
{ typedef seeded_hash_function<Murmur3> hash_function; };
} // namespace cxxmph
#endif // __CXXMPH_SEEDED_HASH_H__

59
deps/cmph/cxxmph/seeded_hash_test.cc vendored Normal file
View File

@@ -0,0 +1,59 @@
#include "seeded_hash.h"
#include <unordered_map>
#include <string>
#include <iostream>
using std::cerr;
using std::endl;
using std::string;
using std::unordered_map;
using namespace cxxmph;
int main(int argc, char** argv) {
auto hasher = seeded_hash_function<Murmur3StringPiece>();
string key1("0");
string key2("1");
auto h1 = hasher.hash128(key1, 1);
auto h2 = hasher.hash128(key2, 1);
if (h1 == h2) {
fprintf(stderr, "unexpected murmur collision\n");
exit(-1);
}
unordered_map<uint64_t, int> g;
for (int i = 0; i < 1000; ++i) g[i] = i;
for (int i = 0; i < 1000; ++i) if (g[i] != i) exit(-1);
auto inthasher = seeded_hash_function<std::hash<uint64_t>>();
unordered_map<h128, uint64_t, h128::hash32> g2;
for (uint64_t i = 0; i < 1000; ++i) {
auto h = inthasher.hash128(i, 0);
if (g2.find(h) != g2.end()) {
std::cerr << "Incorrectly found " << i << std::endl;
exit(-1);
}
if (h128::hash32()(h) != h[3]) {
cerr << "Buggy hash method." << endl;
exit(-1);
}
auto h2 = inthasher.hash128(i, 0);
if (!(h == h2)) {
cerr << "h 64(0) " << h.get64(0) << " h 64(1) " << h.get64(1) << endl;
cerr << " h2 64(0) " << h2.get64(0) << " h2 64(1) " << h2.get64(1) << endl;
cerr << "Broken equality for h128" << endl;
exit(-1);
}
if (h128::hash32()(h) != h128::hash32()(h2)) {
cerr << "Inconsistent hash method." << endl;
exit(-1);
}
g2[h] = i;
if (g2.find(h) == g2.end()) {
std::cerr << "Incorrectly missed " << i << std::endl;
exit(-1);
}
}
for (uint64_t i = 0; i < 1000; ++i) if (g2[inthasher.hash128(i, 0)] != i) exit(-1);
}

23
deps/cmph/cxxmph/string_util.cc vendored Normal file
View File

@@ -0,0 +1,23 @@
#include "string_util.h"
#include <cassert>
#include <cstdint>
#include <iostream>
#include <string>
using namespace std;
namespace cxxmph {
bool stream_printf(
const std::string& format_string, uint32_t offset, std::ostream* out) {
if (offset == format_string.length()) return true;
assert(offset < format_string.length());
cerr << "length:" << format_string.length() << endl;
cerr << "offset:" << offset << endl;
auto txt = format_string.substr(offset, format_string.length() - offset);
*out << txt;
return true;
}
} // namespace cxxmph

133
deps/cmph/cxxmph/string_util.h vendored Normal file
View File

@@ -0,0 +1,133 @@
#ifndef __CXXMPH_STRING_UTIL_H__
#define __CXXMPH_STRING_UTIL_H__
// Helper functions for string formatting and terminal output. Should be used
// only for debugging and tests, since performance was not a concern.
// Implemented using variadic templates because it is cool.
//
// Adds the extra format %v to the printf formatting language. Uses the method
// cxxmph::tostr to implement custom printers and fallback to operator
// ostream::operator<< otherwise.
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <iostream>
#include <string>
#include <sstream>
#include <utility>
#include <vector>
#define CXXMPH_DEBUGLN(fmt) variadic_print(__FILE__, __LINE__, &std::cerr, fmt)
#define CXXMPH_INFOLN(fmt) variadic_print(__FILE__, __LINE__, &std::cout, fmt)
namespace cxxmph {
using std::pair;
using std::string;
using std::ostream;
using std::vector;
template <class T> void tostr(ostream *out, const T& v) {
*out << v;
}
inline void tostr(std::ostream* out, uint8_t v) {
*out << static_cast<uint32_t>(v);
}
template <class V>
inline void tostr(ostream* out, const vector<V>& v) {
*out << "[";
for (uint32_t i = 0; i < v.size(); ++i) {
tostr(out, v[1]);
if (i != v.size() - 1)*out << " ";
}
*out << "]";
}
template <class F, class S>
inline void tostr(ostream* out, const pair<F, S>& v) {
*out << "(";
tostr(out, v.first);
*out << ",";
tostr(out, v.second);
*out << ")";
}
bool stream_printf(
const std::string& format_string, uint32_t offset, std::ostream* out);
template <bool ispod> struct pod_snprintf {};
template <> struct pod_snprintf<false> {
template <class T>
int operator()(char*, size_t, const char*, const T&) {
return -1;
}
};
template <> struct pod_snprintf<true> {
template <class T>
int operator()(char* str, size_t size, const char* format, const T& v) {
return snprintf(str, size, format, v);
}
};
template <typename T, typename... Args>
bool stream_printf(const std::string& format_string, uint32_t offset,
std::ostream* out, const T& value, Args&&... args) {
auto txt = format_string.c_str() + offset;
while (*txt) {
auto b = txt;
for (; *txt != '%'; ++txt);
if (*(txt + 1) == '%') ++txt;
else if (txt == b) break;
*out << string(b, txt - b);
if (*(txt - 1) == '%') ++txt;
}
auto fmt = txt + 1;
while (*fmt && *fmt != '%') ++fmt;
if (strncmp(txt, "%v", 2) == 0) {
txt += 2;
tostr(out, value);
if (txt != fmt) *out << string(txt, fmt);
} else {
char buf[256]; // Is this enough?
auto n = pod_snprintf<std::is_pod<T>::value>()(
buf, 256, std::string(txt, fmt).c_str(), value);
if (n < 0) return false;
*out << buf;
}
return stream_printf(format_string, fmt - format_string.c_str(), out,
std::forward<Args>(args)...);
}
template <typename... Args>
std::string format(const std::string& format_string, Args&&... args) {
std::ostringstream out;
if (!stream_printf(format_string, 0, &out, std::forward<Args>(args)...)) {
return std::string();
};
return out.str();
}
template <typename... Args>
void infoln(const std::string& format_string, Args&&... args) {
stream_printf(format_string + "\n", 0, &std::cout, std::forward<Args>(args)...);
}
struct variadic_print {
variadic_print(const std::string& file, uint32_t line, std::ostream* out,
const std::string& format_line)
: file_(file), line_(line), out_(out), format_line_(format_line) {}
template <typename... Args>
void operator()(Args&&... args) {
std::string fancy_format = "%v:%d: ";
fancy_format += format_line_ + "\n";
stream_printf(fancy_format, 0, out_, file_, line_, std::forward<Args>(args)...);
}
const std::string& file_;
const uint32_t& line_;
std::ostream* out_;
const std::string& format_line_;
};
} // namespace cxxmph
#endif // __CXXMPH_STRING_UTIL_H__

27
deps/cmph/cxxmph/string_util_test.cc vendored Normal file
View File

@@ -0,0 +1,27 @@
#include "string_util.h"
#include "test.h"
using namespace cxxmph;
bool test_format() {
string expected = " %% 4 foo 0x0A bar ";
string foo = "foo";
string fmt = format(" %%%% %v %v 0x%.2X bar ", 4, foo, 10);
fail_unless(fmt == expected, "expected\n-%s-\n got \n-%s-", expected.c_str(), fmt.c_str());
return true;
}
bool test_infoln() {
infoln(string("%s:%d: MY INFO LINE"), __FILE__, __LINE__);
return true;
}
bool test_macro() {
CXXMPH_DEBUGLN("here i am")();
return true;
}
CXXMPH_TEST_CASE(test_format)
CXXMPH_TEST_CASE(test_infoln)
CXXMPH_TEST_CASE(test_macro)

182
deps/cmph/cxxmph/stringpiece.h vendored Normal file
View File

@@ -0,0 +1,182 @@
// Copyright 2001-2010 The RE2 Authors. All Rights Reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// A string-like object that points to a sized piece of memory.
//
// Functions or methods may use const StringPiece& parameters to accept either
// a "const char*" or a "string" value that will be implicitly converted to
// a StringPiece. The implicit conversion means that it is often appropriate
// to include this .h file in other files rather than forward-declaring
// StringPiece as would be appropriate for most other Google classes.
//
// Systematic usage of StringPiece is encouraged as it will reduce unnecessary
// conversions from "const char*" to "string" and back again.
//
//
// Arghh! I wish C++ literals were "string".
#ifndef CXXMPH_STRINGPIECE_H__
#define CXXMPH_STRINGPIECE_H__
#include <cstddef>
#include <string.h>
#include <iosfwd>
#include <string>
namespace cxxmph {
class StringPiece {
private:
const char* ptr_;
int length_;
public:
// We provide non-explicit singleton constructors so users can pass
// in a "const char*" or a "string" wherever a "StringPiece" is
// expected.
StringPiece() : ptr_(NULL), length_(0) { }
StringPiece(const char* str)
: ptr_(str), length_((str == NULL) ? 0 : static_cast<int>(strlen(str))) { }
StringPiece(const std::string& str)
: ptr_(str.data()), length_(static_cast<int>(str.size())) { }
StringPiece(const char* offset, int len) : ptr_(offset), length_(len) { }
// data() may return a pointer to a buffer with embedded NULs, and the
// returned buffer may or may not be null terminated. Therefore it is
// typically a mistake to pass data() to a routine that expects a NUL
// terminated string.
const char* data() const { return ptr_; }
int size() const { return length_; }
int length() const { return length_; }
bool empty() const { return length_ == 0; }
void clear() { ptr_ = NULL; length_ = 0; }
void set(const char* data, int len) { ptr_ = data; length_ = len; }
void set(const char* str) {
ptr_ = str;
if (str != NULL)
length_ = static_cast<int>(strlen(str));
else
length_ = 0;
}
void set(const void* data, int len) {
ptr_ = reinterpret_cast<const char*>(data);
length_ = len;
}
char operator[](int i) const { return ptr_[i]; }
void remove_prefix(int n) {
ptr_ += n;
length_ -= n;
}
void remove_suffix(int n) {
length_ -= n;
}
int compare(const StringPiece& x) const {
int r = memcmp(ptr_, x.ptr_, std::min(length_, x.length_));
if (r == 0) {
if (length_ < x.length_) r = -1;
else if (length_ > x.length_) r = +1;
}
return r;
}
std::string as_string() const {
return std::string(data(), size());
}
// We also define ToString() here, since many other string-like
// interfaces name the routine that converts to a C++ string
// "ToString", and it's confusing to have the method that does that
// for a StringPiece be called "as_string()". We also leave the
// "as_string()" method defined here for existing code.
std::string ToString() const {
return std::string(data(), size());
}
void CopyToString(std::string* target) const;
void AppendToString(std::string* target) const;
// Does "this" start with "x"
bool starts_with(const StringPiece& x) const {
return ((length_ >= x.length_) &&
(memcmp(ptr_, x.ptr_, x.length_) == 0));
}
// Does "this" end with "x"
bool ends_with(const StringPiece& x) const {
return ((length_ >= x.length_) &&
(memcmp(ptr_ + (length_-x.length_), x.ptr_, x.length_) == 0));
}
// standard STL container boilerplate
typedef char value_type;
typedef const char* pointer;
typedef const char& reference;
typedef const char& const_reference;
typedef size_t size_type;
typedef ptrdiff_t difference_type;
static const size_type npos;
typedef const char* const_iterator;
typedef const char* iterator;
typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
typedef std::reverse_iterator<iterator> reverse_iterator;
iterator begin() const { return ptr_; }
iterator end() const { return ptr_ + length_; }
const_reverse_iterator rbegin() const {
return const_reverse_iterator(ptr_ + length_);
}
const_reverse_iterator rend() const {
return const_reverse_iterator(ptr_);
}
// STLS says return size_type, but Google says return int
int max_size() const { return length_; }
int capacity() const { return length_; }
int copy(char* buf, size_type n, size_type pos = 0) const;
int find(const StringPiece& s, size_type pos = 0) const;
int find(char c, size_type pos = 0) const;
int rfind(const StringPiece& s, size_type pos = npos) const;
int rfind(char c, size_type pos = npos) const;
StringPiece substr(size_type pos, size_type n = npos) const;
};
inline bool operator==(const StringPiece& x, const StringPiece& y) {
return x.length() == y.length() && memcmp(x.data(), y.data(), x.length()) == 0;
}
inline bool operator!=(const StringPiece& x, const StringPiece& y) {
return !(x == y);
}
inline bool operator<(const StringPiece& x, const StringPiece& y) {
const int r = memcmp(x.data(), y.data(),
std::min(x.size(), y.size()));
return ((r < 0) || ((r == 0) && (x.size() < y.size())));
}
inline bool operator>(const StringPiece& x, const StringPiece& y) {
return y < x;
}
inline bool operator<=(const StringPiece& x, const StringPiece& y) {
return !(x > y);
}
inline bool operator>=(const StringPiece& x, StringPiece& y) {
return !(x < y);
}
} // namespace cxxmph
// allow StringPiece to be logged
inline std::ostream& operator<<(std::ostream& o, const cxxmph::StringPiece& piece) {
o << piece.as_string(); return o;
}
#endif // CXXMPH_STRINGPIECE_H__

22
deps/cmph/cxxmph/test.cc vendored Normal file
View File

@@ -0,0 +1,22 @@
#include <cstdlib> // For EXIT_SUCCESS, EXIT_FAILURE
#include "test.h"
Suite* global_suite() {
static Suite* gs = suite_create("cxxmph_test_suite");
return gs;
}
TCase* global_tc_core() {
static TCase* gtc = tcase_create("Core");
return gtc;
}
int main (void) {
suite_add_tcase(global_suite(), global_tc_core());
int number_failed;
SRunner *sr = srunner_create (global_suite());
srunner_run_all (sr, CK_NORMAL);
number_failed = srunner_ntests_failed (sr);
srunner_free (sr);
return (number_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE;
}

32
deps/cmph/cxxmph/test.h vendored Normal file
View File

@@ -0,0 +1,32 @@
#ifndef __CXXMPH_TEST_H__
#define __CXXMPH_TEST_H__
// Thin wrapper on top of check.h to get rid of boilerplate in tests. Assumes a
// single test suite and test case per file, with each fixture represented by a
// parameter-less boolean function.
//
// The check.h header macro-clashes with c++ libraries so this file needs to be
// included last.
#include <check.h>
#include <stdio.h>
Suite* global_suite();
TCase* global_tc_core();
// Creates a new test case calling boolean_function. Name must be a valid,
// unique c identifier when prefixed with tc_.
#define CXXMPH_CXX_TEST_CASE(name, boolean_function) \
START_TEST(tc_ ## name) \
{ fail_unless(boolean_function()); } END_TEST \
static TestCase global_cxxmph_tc_ ## name(tc_ ## name);
#define CXXMPH_TEST_CASE(name) CXXMPH_CXX_TEST_CASE(name, name)
struct TestCase {
TestCase(void (*f)(int)) {
tcase_add_test(global_tc_core(), f);
}
};
#endif // __CXXMPH_TEST_H__

4
deps/cmph/cxxmph/test_test.cc vendored Normal file
View File

@@ -0,0 +1,4 @@
#include "test.h"
bool tautology() { return true; }
CXXMPH_TEST_CASE(tautology)

82
deps/cmph/cxxmph/trigraph.cc vendored Normal file
View File

@@ -0,0 +1,82 @@
#include <cassert>
#include <limits>
#include <iostream>
#include "trigraph.h"
using std::cerr;
using std::endl;
using std::vector;
namespace {
static const uint32_t kInvalidEdge = std::numeric_limits<uint32_t>::max();
}
namespace cxxmph {
TriGraph::TriGraph(uint32_t nvertices, uint32_t nedges)
: nedges_(0),
edges_(nedges),
next_edge_(nedges),
first_edge_(nvertices, kInvalidEdge),
vertex_degree_(nvertices, 0) { }
TriGraph::~TriGraph() {}
void TriGraph::ExtractEdgesAndClear(vector<Edge>* edges) {
vector<Edge>().swap(next_edge_);
vector<uint32_t>().swap(first_edge_);
vector<uint8_t>().swap(vertex_degree_);
nedges_ = 0;
edges->swap(edges_);
}
void TriGraph::AddEdge(const Edge& edge) {
edges_[nedges_] = edge;
assert(first_edge_.size() > edge[0]);
assert(first_edge_.size() > edge[1]);
assert(first_edge_.size() > edge[0]);
assert(first_edge_.size() > edge[1]);
assert(first_edge_.size() > edge[2]);
assert(next_edge_.size() > nedges_);
next_edge_[nedges_] = Edge(
first_edge_[edge[0]], first_edge_[edge[1]], first_edge_[edge[2]]);
first_edge_[edge[0]] = first_edge_[edge[1]] = first_edge_[edge[2]] = nedges_;
++vertex_degree_[edge[0]];
++vertex_degree_[edge[1]];
++vertex_degree_[edge[2]];
++nedges_;
}
void TriGraph::RemoveEdge(uint32_t current_edge) {
// cerr << "Removing edge " << current_edge << " from " << nedges_ << " existing edges " << endl;
for (int i = 0; i < 3; ++i) {
uint32_t vertex = edges_[current_edge][i];
uint32_t edge1 = first_edge_[vertex];
uint32_t edge2 = kInvalidEdge;
uint32_t j = 0;
while (edge1 != current_edge && edge1 != kInvalidEdge) {
edge2 = edge1;
if (edges_[edge1][0] == vertex) j = 0;
else if (edges_[edge1][1] == vertex) j = 1;
else j = 2;
edge1 = next_edge_[edge1][j];
}
assert(edge1 != kInvalidEdge);
if (edge2 != kInvalidEdge) next_edge_[edge2][j] = next_edge_[edge1][i];
else first_edge_[vertex] = next_edge_[edge1][i];
--vertex_degree_[vertex];
}
}
void TriGraph::DebugGraph() const {
uint32_t i;
for(i = 0; i < edges_.size(); i++){
cerr << i << " " << edges_[i][0] << " " << edges_[i][1] << " " << edges_[i][2]
<< " nexts " << next_edge_[i][0] << " " << next_edge_[i][1] << " " << next_edge_[i][2] << endl;
}
for(i = 0; i < first_edge_.size();i++){
cerr << "first for vertice " <<i << " " << first_edge_[i] << endl;
}
}
} // namespace cxxmph

49
deps/cmph/cxxmph/trigraph.h vendored Normal file
View File

@@ -0,0 +1,49 @@
#ifndef __CXXMPH_TRIGRAPH_H__
#define __CXXMPH_TRIGRAPH_H__
// Build a trigraph using a memory efficient representation.
//
// Prior knowledge of the number of edges and vertices for the graph is
// required. For each vertex, we store how many edges touch it (degree) and the
// index of the first edge in the vector of triples representing the edges.
#include <stdint.h> // for uint32_t and friends
#include <vector>
namespace cxxmph {
class TriGraph {
public:
struct Edge {
Edge() { }
Edge(uint32_t v0, uint32_t v1, uint32_t v2) {
vertices[0] = v0;
vertices[1] = v1;
vertices[2] = v2;
}
uint32_t& operator[](uint8_t v) { return vertices[v]; }
const uint32_t& operator[](uint8_t v) const { return vertices[v]; }
uint32_t vertices[3];
};
TriGraph(uint32_t nedges, uint32_t nvertices);
~TriGraph();
void AddEdge(const Edge& edge);
void RemoveEdge(uint32_t edge_id);
void ExtractEdgesAndClear(std::vector<Edge>* edges);
void DebugGraph() const;
const std::vector<Edge>& edges() const { return edges_; }
const std::vector<uint8_t>& vertex_degree() const { return vertex_degree_; }
const std::vector<uint32_t>& first_edge() const { return first_edge_; }
private:
uint32_t nedges_; // total number of edges
std::vector<Edge> edges_;
std::vector<Edge> next_edge_; // for implementing removal
std::vector<uint32_t> first_edge_; // the first edge for this vertex
std::vector<uint8_t> vertex_degree_; // number of edges for this vertex
};
} // namespace cxxmph
#endif // __CXXMPH_TRIGRAPH_H__

22
deps/cmph/cxxmph/trigraph_test.cc vendored Normal file
View File

@@ -0,0 +1,22 @@
#include <cassert>
#include "trigraph.h"
using cxxmph::TriGraph;
int main(int argc, char** argv) {
TriGraph g(4, 2);
g.AddEdge(TriGraph::Edge(0, 1, 2));
g.AddEdge(TriGraph::Edge(1, 3, 2));
assert(g.vertex_degree()[0] == 1);
assert(g.vertex_degree()[1] == 2);
assert(g.vertex_degree()[2] == 2);
assert(g.vertex_degree()[3] == 1);
g.RemoveEdge(0);
assert(g.vertex_degree()[0] == 0);
assert(g.vertex_degree()[1] == 1);
assert(g.vertex_degree()[2] == 1);
assert(g.vertex_degree()[3] == 1);
std::vector<TriGraph::Edge> edges;
g.ExtractEdgesAndClear(&edges);
}