Add 'deps/cmph/' from commit 'a250982ade093f4eed0552bbdd22dd7b0432007f'
git-subtree-dir: deps/cmph git-subtree-mainline:5040f4007bgit-subtree-split:a250982ade
This commit is contained in:
58
deps/cmph/cxxmph/.ycm_extra_conf.py
vendored
Normal file
58
deps/cmph/cxxmph/.ycm_extra_conf.py
vendored
Normal file
@@ -0,0 +1,58 @@
|
||||
import os
|
||||
import ycm_core
|
||||
|
||||
flags = [
|
||||
'-Wall',
|
||||
'-Wextra',
|
||||
'-Werror',
|
||||
'-DNDEBUG',
|
||||
'-DUSE_CLANG_COMPLETER',
|
||||
'-std=c++11',
|
||||
'-x',
|
||||
'c++',
|
||||
'-isystem'
|
||||
'/usr/lib/c++/v1',
|
||||
'-I',
|
||||
'.',
|
||||
]
|
||||
|
||||
def DirectoryOfThisScript():
|
||||
return os.path.dirname( os.path.abspath( __file__ ) )
|
||||
|
||||
|
||||
def MakeRelativePathsInFlagsAbsolute( flags, working_directory ):
|
||||
if not working_directory:
|
||||
return list( flags )
|
||||
new_flags = []
|
||||
make_next_absolute = False
|
||||
path_flags = [ '-isystem', '-I', '-iquote', '--sysroot=' ]
|
||||
for flag in flags:
|
||||
new_flag = flag
|
||||
|
||||
if make_next_absolute:
|
||||
make_next_absolute = False
|
||||
if not flag.startswith( '/' ):
|
||||
new_flag = os.path.join( working_directory, flag )
|
||||
|
||||
for path_flag in path_flags:
|
||||
if flag == path_flag:
|
||||
make_next_absolute = True
|
||||
break
|
||||
|
||||
if flag.startswith( path_flag ):
|
||||
path = flag[ len( path_flag ): ]
|
||||
new_flag = path_flag + os.path.join( working_directory, path )
|
||||
break
|
||||
|
||||
if new_flag:
|
||||
new_flags.append( new_flag )
|
||||
return new_flags
|
||||
|
||||
|
||||
def FlagsForFile( filename ):
|
||||
relative_to = DirectoryOfThisScript()
|
||||
final_flags = MakeRelativePathsInFlagsAbsolute( flags, relative_to )
|
||||
return {
|
||||
'flags': final_flags,
|
||||
'do_cache': True
|
||||
}
|
||||
62
deps/cmph/cxxmph/Makefile.am
vendored
Normal file
62
deps/cmph/cxxmph/Makefile.am
vendored
Normal file
@@ -0,0 +1,62 @@
|
||||
TESTS = $(check_PROGRAMS)
|
||||
check_PROGRAMS = seeded_hash_test mph_bits_test hollow_iterator_test mph_index_test trigraph_test
|
||||
if USE_LIBCHECK
|
||||
check_PROGRAMS += test_test map_tester_test mph_map_test dense_hash_map_test string_util_test
|
||||
check_LTLIBRARIES = libcxxmph_test.la
|
||||
endif
|
||||
|
||||
if USE_BENCHMARKS
|
||||
noinst_PROGRAMS = bm_map # bm_index - disabled because of cmph dependency
|
||||
endif
|
||||
bin_PROGRAMS = cxxmph
|
||||
|
||||
cxxmph_includedir = $(includedir)/cxxmph/
|
||||
cxxmph_include_HEADERS = mph_bits.h mph_map.h mph_index.h MurmurHash3.h trigraph.h seeded_hash.h stringpiece.h hollow_iterator.h string_util.h
|
||||
|
||||
noinst_LTLIBRARIES = libcxxmph_bm.la
|
||||
lib_LTLIBRARIES = libcxxmph.la
|
||||
libcxxmph_la_SOURCES = MurmurHash3.cpp trigraph.cc mph_bits.cc mph_index.cc benchmark.h benchmark.cc string_util.cc
|
||||
libcxxmph_la_LDFLAGS = -version-info 0:0:0
|
||||
libcxxmph_test_la_SOURCES = test.h test.cc
|
||||
libcxxmph_test_la_LIBADD = libcxxmph.la
|
||||
libcxxmph_bm_la_SOURCES = benchmark.h benchmark.cc bm_common.h bm_common.cc
|
||||
libcxxmph_bm_la_LIBADD = libcxxmph.la
|
||||
|
||||
test_test_SOURCES = test_test.cc
|
||||
test_test_LDADD = libcxxmph_test.la $(CHECK_LIBS)
|
||||
|
||||
mph_map_test_LDADD = libcxxmph_test.la $(CHECK_LIBS)
|
||||
mph_map_test_SOURCES = mph_map_test.cc
|
||||
dense_hash_map_test_LDADD = libcxxmph_test.la $(CHECK_LIBS)
|
||||
dense_hash_map_test_SOURCES = dense_hash_map_test.cc
|
||||
|
||||
mph_index_test_LDADD = libcxxmph.la
|
||||
mph_index_test_SOURCES = mph_index_test.cc
|
||||
|
||||
trigraph_test_LDADD = libcxxmph.la
|
||||
trigraph_test_SOURCES = trigraph_test.cc
|
||||
|
||||
# Bad dependency, do not compile by default.
|
||||
# bm_index_LDADD = libcxxmph_bm.la -lcmph
|
||||
# bm_index_SOURCES = bm_index.cc
|
||||
|
||||
bm_map_LDADD = libcxxmph_bm.la
|
||||
bm_map_SOURCES = bm_map.cc
|
||||
|
||||
cxxmph_LDADD = libcxxmph.la
|
||||
cxxmph_SOURCES = cxxmph.cc
|
||||
|
||||
hollow_iterator_test_SOURCES = hollow_iterator_test.cc
|
||||
|
||||
seeded_hash_test_SOURCES = seeded_hash_test.cc
|
||||
seeded_hash_test_LDADD = libcxxmph.la
|
||||
|
||||
mph_bits_test_SOURCES = mph_bits_test.cc
|
||||
mph_bits_test_LDADD = libcxxmph.la
|
||||
|
||||
string_util_test_SOURCES = string_util_test.cc
|
||||
string_util_test_LDADD = libcxxmph.la libcxxmph_test.la $(CHECK_LIBS)
|
||||
|
||||
map_tester_test_SOURCES = map_tester.h map_tester.cc map_tester_test.cc
|
||||
map_tester_test_LDADD = libcxxmph.la libcxxmph_test.la $(CHECK_LIBS)
|
||||
|
||||
335
deps/cmph/cxxmph/MurmurHash3.cpp
vendored
Normal file
335
deps/cmph/cxxmph/MurmurHash3.cpp
vendored
Normal file
@@ -0,0 +1,335 @@
|
||||
//-----------------------------------------------------------------------------
|
||||
// MurmurHash3 was written by Austin Appleby, and is placed in the public
|
||||
// domain. The author hereby disclaims copyright to this source code.
|
||||
|
||||
// Note - The x86 and x64 versions do _not_ produce the same results, as the
|
||||
// algorithms are optimized for their respective platforms. You can still
|
||||
// compile and run any of them on any platform, but your performance with the
|
||||
// non-native version will be less than optimal.
|
||||
|
||||
#include "MurmurHash3.h"
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Platform-specific functions and macros
|
||||
|
||||
// Microsoft Visual Studio
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
|
||||
#define FORCE_INLINE __forceinline
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define ROTL32(x,y) _rotl(x,y)
|
||||
#define ROTL64(x,y) _rotl64(x,y)
|
||||
|
||||
#define BIG_CONSTANT(x) (x)
|
||||
|
||||
// Other compilers
|
||||
|
||||
#else // defined(_MSC_VER)
|
||||
|
||||
#define FORCE_INLINE __attribute__((always_inline))
|
||||
|
||||
inline uint32_t rotl32 ( uint32_t x, int8_t r )
|
||||
{
|
||||
return (x << r) | (x >> (32 - r));
|
||||
}
|
||||
|
||||
inline uint64_t rotl64 ( uint64_t x, int8_t r )
|
||||
{
|
||||
return (x << r) | (x >> (64 - r));
|
||||
}
|
||||
|
||||
#define ROTL32(x,y) rotl32(x,y)
|
||||
#define ROTL64(x,y) rotl64(x,y)
|
||||
|
||||
#define BIG_CONSTANT(x) (x##LLU)
|
||||
|
||||
#endif // !defined(_MSC_VER)
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Block read - if your platform needs to do endian-swapping or can only
|
||||
// handle aligned reads, do the conversion here
|
||||
|
||||
/*FORCE_INLINE*/ uint32_t getblock ( const uint32_t * p, int i )
|
||||
{
|
||||
return p[i];
|
||||
}
|
||||
|
||||
/*FORCE_INLINE*/ uint64_t getblock ( const uint64_t * p, int i )
|
||||
{
|
||||
return p[i];
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Finalization mix - force all bits of a hash block to avalanche
|
||||
|
||||
/*FORCE_INLINE*/ uint32_t fmix ( uint32_t h )
|
||||
{
|
||||
h ^= h >> 16;
|
||||
h *= 0x85ebca6b;
|
||||
h ^= h >> 13;
|
||||
h *= 0xc2b2ae35;
|
||||
h ^= h >> 16;
|
||||
|
||||
return h;
|
||||
}
|
||||
|
||||
//----------
|
||||
|
||||
/*FORCE_INLINE*/ uint64_t fmix ( uint64_t k )
|
||||
{
|
||||
k ^= k >> 33;
|
||||
k *= BIG_CONSTANT(0xff51afd7ed558ccd);
|
||||
k ^= k >> 33;
|
||||
k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
|
||||
k ^= k >> 33;
|
||||
|
||||
return k;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
void MurmurHash3_x86_32 ( const void * key, int len,
|
||||
uint32_t seed, void * out )
|
||||
{
|
||||
const uint8_t * data = (const uint8_t*)key;
|
||||
const int nblocks = len / 4;
|
||||
|
||||
uint32_t h1 = seed;
|
||||
|
||||
uint32_t c1 = 0xcc9e2d51;
|
||||
uint32_t c2 = 0x1b873593;
|
||||
|
||||
//----------
|
||||
// body
|
||||
|
||||
const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
|
||||
|
||||
for(int i = -nblocks; i; i++)
|
||||
{
|
||||
uint32_t k1 = getblock(blocks,i);
|
||||
|
||||
k1 *= c1;
|
||||
k1 = ROTL32(k1,15);
|
||||
k1 *= c2;
|
||||
|
||||
h1 ^= k1;
|
||||
h1 = ROTL32(h1,13);
|
||||
h1 = h1*5+0xe6546b64;
|
||||
}
|
||||
|
||||
//----------
|
||||
// tail
|
||||
|
||||
const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
|
||||
|
||||
uint32_t k1 = 0;
|
||||
|
||||
switch(len & 3)
|
||||
{
|
||||
case 3: k1 ^= tail[2] << 16;
|
||||
case 2: k1 ^= tail[1] << 8;
|
||||
case 1: k1 ^= tail[0];
|
||||
k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
|
||||
};
|
||||
|
||||
//----------
|
||||
// finalization
|
||||
|
||||
h1 ^= len;
|
||||
|
||||
h1 = fmix(h1);
|
||||
|
||||
*(uint32_t*)out = h1;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
void MurmurHash3_x86_128 ( const void * key, const int len,
|
||||
uint32_t seed, void * out )
|
||||
{
|
||||
const uint8_t * data = (const uint8_t*)key;
|
||||
const int nblocks = len / 16;
|
||||
|
||||
uint32_t h1 = seed;
|
||||
uint32_t h2 = seed;
|
||||
uint32_t h3 = seed;
|
||||
uint32_t h4 = seed;
|
||||
|
||||
uint32_t c1 = 0x239b961b;
|
||||
uint32_t c2 = 0xab0e9789;
|
||||
uint32_t c3 = 0x38b34ae5;
|
||||
uint32_t c4 = 0xa1e38b93;
|
||||
|
||||
//----------
|
||||
// body
|
||||
|
||||
const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
|
||||
|
||||
for(int i = -nblocks; i; i++)
|
||||
{
|
||||
uint32_t k1 = getblock(blocks,i*4+0);
|
||||
uint32_t k2 = getblock(blocks,i*4+1);
|
||||
uint32_t k3 = getblock(blocks,i*4+2);
|
||||
uint32_t k4 = getblock(blocks,i*4+3);
|
||||
|
||||
k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
|
||||
|
||||
h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
|
||||
|
||||
k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
|
||||
|
||||
h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
|
||||
|
||||
k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
|
||||
|
||||
h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
|
||||
|
||||
k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
|
||||
|
||||
h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
|
||||
}
|
||||
|
||||
//----------
|
||||
// tail
|
||||
|
||||
const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
|
||||
|
||||
uint32_t k1 = 0;
|
||||
uint32_t k2 = 0;
|
||||
uint32_t k3 = 0;
|
||||
uint32_t k4 = 0;
|
||||
|
||||
switch(len & 15)
|
||||
{
|
||||
case 15: k4 ^= tail[14] << 16;
|
||||
case 14: k4 ^= tail[13] << 8;
|
||||
case 13: k4 ^= tail[12] << 0;
|
||||
k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
|
||||
|
||||
case 12: k3 ^= tail[11] << 24;
|
||||
case 11: k3 ^= tail[10] << 16;
|
||||
case 10: k3 ^= tail[ 9] << 8;
|
||||
case 9: k3 ^= tail[ 8] << 0;
|
||||
k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
|
||||
|
||||
case 8: k2 ^= tail[ 7] << 24;
|
||||
case 7: k2 ^= tail[ 6] << 16;
|
||||
case 6: k2 ^= tail[ 5] << 8;
|
||||
case 5: k2 ^= tail[ 4] << 0;
|
||||
k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
|
||||
|
||||
case 4: k1 ^= tail[ 3] << 24;
|
||||
case 3: k1 ^= tail[ 2] << 16;
|
||||
case 2: k1 ^= tail[ 1] << 8;
|
||||
case 1: k1 ^= tail[ 0] << 0;
|
||||
k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
|
||||
};
|
||||
|
||||
//----------
|
||||
// finalization
|
||||
|
||||
h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
|
||||
|
||||
h1 += h2; h1 += h3; h1 += h4;
|
||||
h2 += h1; h3 += h1; h4 += h1;
|
||||
|
||||
h1 = fmix(h1);
|
||||
h2 = fmix(h2);
|
||||
h3 = fmix(h3);
|
||||
h4 = fmix(h4);
|
||||
|
||||
h1 += h2; h1 += h3; h1 += h4;
|
||||
h2 += h1; h3 += h1; h4 += h1;
|
||||
|
||||
((uint32_t*)out)[0] = h1;
|
||||
((uint32_t*)out)[1] = h2;
|
||||
((uint32_t*)out)[2] = h3;
|
||||
((uint32_t*)out)[3] = h4;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
void MurmurHash3_x64_128 ( const void * key, const int len,
|
||||
const uint32_t seed, void * out )
|
||||
{
|
||||
const uint8_t * data = (const uint8_t*)key;
|
||||
const int nblocks = len / 16;
|
||||
|
||||
uint64_t h1 = seed;
|
||||
uint64_t h2 = seed;
|
||||
|
||||
uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
|
||||
uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
|
||||
|
||||
//----------
|
||||
// body
|
||||
|
||||
const uint64_t * blocks = (const uint64_t *)(data);
|
||||
|
||||
for(int i = 0; i < nblocks; i++)
|
||||
{
|
||||
uint64_t k1 = getblock(blocks,i*2+0);
|
||||
uint64_t k2 = getblock(blocks,i*2+1);
|
||||
|
||||
k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
|
||||
|
||||
h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
|
||||
|
||||
k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
|
||||
|
||||
h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
|
||||
}
|
||||
|
||||
//----------
|
||||
// tail
|
||||
|
||||
const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
|
||||
|
||||
uint64_t k1 = 0;
|
||||
uint64_t k2 = 0;
|
||||
|
||||
switch(len & 15)
|
||||
{
|
||||
case 15: k2 ^= uint64_t(tail[14]) << 48;
|
||||
case 14: k2 ^= uint64_t(tail[13]) << 40;
|
||||
case 13: k2 ^= uint64_t(tail[12]) << 32;
|
||||
case 12: k2 ^= uint64_t(tail[11]) << 24;
|
||||
case 11: k2 ^= uint64_t(tail[10]) << 16;
|
||||
case 10: k2 ^= uint64_t(tail[ 9]) << 8;
|
||||
case 9: k2 ^= uint64_t(tail[ 8]) << 0;
|
||||
k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
|
||||
|
||||
case 8: k1 ^= uint64_t(tail[ 7]) << 56;
|
||||
case 7: k1 ^= uint64_t(tail[ 6]) << 48;
|
||||
case 6: k1 ^= uint64_t(tail[ 5]) << 40;
|
||||
case 5: k1 ^= uint64_t(tail[ 4]) << 32;
|
||||
case 4: k1 ^= uint64_t(tail[ 3]) << 24;
|
||||
case 3: k1 ^= uint64_t(tail[ 2]) << 16;
|
||||
case 2: k1 ^= uint64_t(tail[ 1]) << 8;
|
||||
case 1: k1 ^= uint64_t(tail[ 0]) << 0;
|
||||
k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
|
||||
};
|
||||
|
||||
//----------
|
||||
// finalization
|
||||
|
||||
h1 ^= len; h2 ^= len;
|
||||
|
||||
h1 += h2;
|
||||
h2 += h1;
|
||||
|
||||
h1 = fmix(h1);
|
||||
h2 = fmix(h2);
|
||||
|
||||
h1 += h2;
|
||||
h2 += h1;
|
||||
|
||||
((uint64_t*)out)[0] = h1;
|
||||
((uint64_t*)out)[1] = h2;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
37
deps/cmph/cxxmph/MurmurHash3.h
vendored
Normal file
37
deps/cmph/cxxmph/MurmurHash3.h
vendored
Normal file
@@ -0,0 +1,37 @@
|
||||
//-----------------------------------------------------------------------------
|
||||
// MurmurHash3 was written by Austin Appleby, and is placed in the public
|
||||
// domain. The author hereby disclaims copyright to this source code.
|
||||
|
||||
#ifndef _MURMURHASH3_H_
|
||||
#define _MURMURHASH3_H_
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Platform-specific functions and macros
|
||||
|
||||
// Microsoft Visual Studio
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
|
||||
typedef unsigned char uint8_t;
|
||||
typedef unsigned long uint32_t;
|
||||
typedef unsigned __int64 uint64_t;
|
||||
|
||||
// Other compilers
|
||||
|
||||
#else // defined(_MSC_VER)
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#endif // !defined(_MSC_VER)
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out );
|
||||
|
||||
void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
|
||||
|
||||
void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
#endif // _MURMURHASH3_H_
|
||||
142
deps/cmph/cxxmph/benchmark.cc
vendored
Normal file
142
deps/cmph/cxxmph/benchmark.cc
vendored
Normal file
@@ -0,0 +1,142 @@
|
||||
#include "benchmark.h"
|
||||
|
||||
#include <cerrno>
|
||||
#include <cstring>
|
||||
#include <cstdio>
|
||||
#include <memory>
|
||||
#include <sys/time.h>
|
||||
#include <sys/resource.h>
|
||||
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
|
||||
using std::cerr;
|
||||
using std::cout;
|
||||
using std::endl;
|
||||
using std::setfill;
|
||||
using std::setw;
|
||||
using std::string;
|
||||
using std::ostringstream;
|
||||
using std::vector;
|
||||
|
||||
namespace {
|
||||
|
||||
/* Subtract the `struct timeval' values X and Y,
|
||||
storing the result in RESULT.
|
||||
Return 1 if the difference is negative, otherwise 0. */
|
||||
int timeval_subtract (
|
||||
struct timeval *result, struct timeval *x, struct timeval* y) {
|
||||
/* Perform the carry for the later subtraction by updating y. */
|
||||
if (x->tv_usec < y->tv_usec) {
|
||||
int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
|
||||
y->tv_usec -= 1000000 * nsec;
|
||||
y->tv_sec += nsec;
|
||||
}
|
||||
if (x->tv_usec - y->tv_usec > 1000000) {
|
||||
int nsec = (x->tv_usec - y->tv_usec) / 1000000;
|
||||
y->tv_usec += 1000000 * nsec;
|
||||
y->tv_sec -= nsec;
|
||||
}
|
||||
|
||||
/* Compute the time remaining to wait.
|
||||
tv_usec is certainly positive. */
|
||||
result->tv_sec = x->tv_sec - y->tv_sec;
|
||||
result->tv_usec = x->tv_usec - y->tv_usec;
|
||||
|
||||
/* Return 1 if result is negative. */
|
||||
return x->tv_sec < y->tv_sec;
|
||||
}
|
||||
|
||||
// C++ iostream is terrible for formatting.
|
||||
string timeval_to_string(timeval tv) {
|
||||
ostringstream out;
|
||||
out << setfill(' ') << setw(3) << tv.tv_sec << '.';
|
||||
out << setfill('0') << setw(6) << tv.tv_usec;
|
||||
return out.str();
|
||||
}
|
||||
|
||||
struct rusage getrusage_or_die() {
|
||||
struct rusage rs;
|
||||
int ret = getrusage(RUSAGE_SELF, &rs);
|
||||
if (ret != 0) {
|
||||
cerr << "rusage failed: " << strerror(errno) << endl;
|
||||
exit(-1);
|
||||
}
|
||||
return rs;
|
||||
}
|
||||
|
||||
struct timeval gettimeofday_or_die() {
|
||||
struct timeval tv;
|
||||
int ret = gettimeofday(&tv, NULL);
|
||||
if (ret != 0) {
|
||||
cerr << "gettimeofday failed: " << strerror(errno) << endl;
|
||||
exit(-1);
|
||||
}
|
||||
return tv;
|
||||
}
|
||||
|
||||
#ifdef HAVE_CXA_DEMANGLE
|
||||
string demangle(const string& name) {
|
||||
char buf[1024];
|
||||
unsigned int size = 1024;
|
||||
int status;
|
||||
char* res = abi::__cxa_demangle(
|
||||
name.c_str(), buf, &size, &status);
|
||||
return res;
|
||||
}
|
||||
#else
|
||||
string demangle(const string& name) { return name; }
|
||||
#endif
|
||||
|
||||
|
||||
static vector<cxxmph::Benchmark*> g_benchmarks;
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
namespace cxxmph {
|
||||
|
||||
/* static */ void Benchmark::Register(Benchmark* bm) {
|
||||
if (bm->name().empty()) {
|
||||
string name = demangle(typeid(*bm).name());
|
||||
bm->set_name(name);
|
||||
}
|
||||
g_benchmarks.push_back(bm);
|
||||
}
|
||||
|
||||
/* static */ void Benchmark::RunAll() {
|
||||
for (uint32_t i = 0; i < g_benchmarks.size(); ++i) {
|
||||
std::auto_ptr<Benchmark> bm(g_benchmarks[i]);
|
||||
if (!bm->SetUp()) {
|
||||
cerr << "Set up phase for benchmark "
|
||||
<< bm->name() << " failed." << endl;
|
||||
continue;
|
||||
}
|
||||
bm->MeasureRun();
|
||||
bm->TearDown();
|
||||
}
|
||||
}
|
||||
|
||||
void Benchmark::MeasureRun() {
|
||||
struct timeval walltime_begin = gettimeofday_or_die();
|
||||
struct rusage begin = getrusage_or_die();
|
||||
Run();
|
||||
struct rusage end = getrusage_or_die();
|
||||
struct timeval walltime_end = gettimeofday_or_die();
|
||||
|
||||
struct timeval utime;
|
||||
timeval_subtract(&utime, &end.ru_utime, &begin.ru_utime);
|
||||
struct timeval stime;
|
||||
timeval_subtract(&stime, &end.ru_stime, &begin.ru_stime);
|
||||
struct timeval wtime;
|
||||
timeval_subtract(&wtime, &walltime_end, &walltime_begin);
|
||||
|
||||
cout << "Benchmark: " << name_ << endl;
|
||||
cout << "CPU User time : " << timeval_to_string(utime) << endl;
|
||||
cout << "CPU System time: " << timeval_to_string(stime) << endl;
|
||||
cout << "Wall clock time: " << timeval_to_string(wtime) << endl;
|
||||
cout << endl;
|
||||
}
|
||||
|
||||
} // namespace cxxmph
|
||||
32
deps/cmph/cxxmph/benchmark.h
vendored
Normal file
32
deps/cmph/cxxmph/benchmark.h
vendored
Normal file
@@ -0,0 +1,32 @@
|
||||
#ifndef __CXXMPH_BENCHMARK_H__
|
||||
#define __CXXMPH_BENCHMARK_H__
|
||||
|
||||
#include <string>
|
||||
#include <typeinfo>
|
||||
|
||||
namespace cxxmph {
|
||||
|
||||
class Benchmark {
|
||||
public:
|
||||
Benchmark() {}
|
||||
virtual ~Benchmark() {}
|
||||
|
||||
const std::string& name() { return name_; }
|
||||
void set_name(const std::string& name) { name_ = name; }
|
||||
|
||||
static void Register(Benchmark* bm);
|
||||
static void RunAll();
|
||||
|
||||
protected:
|
||||
virtual bool SetUp() { return true; };
|
||||
virtual void Run() = 0;
|
||||
virtual bool TearDown() { return true; };
|
||||
|
||||
private:
|
||||
std::string name_;
|
||||
void MeasureRun();
|
||||
};
|
||||
|
||||
} // namespace cxxmph
|
||||
|
||||
#endif
|
||||
75
deps/cmph/cxxmph/bm_common.cc
vendored
Normal file
75
deps/cmph/cxxmph/bm_common.cc
vendored
Normal file
@@ -0,0 +1,75 @@
|
||||
#include <cmath>
|
||||
#include <fstream>
|
||||
#include <limits>
|
||||
#include <iostream>
|
||||
#include <set>
|
||||
|
||||
#include "bm_common.h"
|
||||
|
||||
using std::cerr;
|
||||
using std::endl;
|
||||
using std::set;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
namespace cxxmph {
|
||||
|
||||
UrlsBenchmark::~UrlsBenchmark() {}
|
||||
bool UrlsBenchmark::SetUp() {
|
||||
vector<string> urls;
|
||||
std::ifstream f(urls_file_.c_str());
|
||||
if (!f.is_open()) {
|
||||
cerr << "Failed to open urls file " << urls_file_ << endl;
|
||||
return false;
|
||||
}
|
||||
string buffer;
|
||||
while(std::getline(f, buffer)) urls.push_back(buffer);
|
||||
set<string> unique(urls.begin(), urls.end());
|
||||
if (unique.size() != urls.size()) {
|
||||
cerr << "Input file has repeated keys." << endl;
|
||||
return false;
|
||||
}
|
||||
urls.swap(urls_);
|
||||
return true;
|
||||
}
|
||||
|
||||
SearchUrlsBenchmark::~SearchUrlsBenchmark() {}
|
||||
bool SearchUrlsBenchmark::SetUp() {
|
||||
if (!UrlsBenchmark::SetUp()) return false;
|
||||
int32_t miss_ratio_int32 = std::numeric_limits<int32_t>::max() * miss_ratio_;
|
||||
forced_miss_urls_.resize(nsearches_);
|
||||
random_.resize(nsearches_);
|
||||
for (uint32_t i = 0; i < nsearches_; ++i) {
|
||||
random_[i] = urls_[random() % urls_.size()];
|
||||
if (random() < miss_ratio_int32) {
|
||||
forced_miss_urls_[i] = random_[i].as_string() + ".force_miss";
|
||||
random_[i] = forced_miss_urls_[i];
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
Uint64Benchmark::~Uint64Benchmark() {}
|
||||
bool Uint64Benchmark::SetUp() {
|
||||
set<uint64_t> unique;
|
||||
for (uint32_t i = 0; i < count_; ++i) {
|
||||
uint64_t v;
|
||||
do { v = random(); } while (unique.find(v) != unique.end());
|
||||
values_.push_back(v);
|
||||
unique.insert(v);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
SearchUint64Benchmark::~SearchUint64Benchmark() {}
|
||||
bool SearchUint64Benchmark::SetUp() {
|
||||
if (!Uint64Benchmark::SetUp()) return false;
|
||||
random_.resize(nsearches_);
|
||||
for (uint32_t i = 0; i < nsearches_; ++i) {
|
||||
uint32_t pos = random() % values_.size();
|
||||
random_[i] = values_[pos];
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace cxxmph
|
||||
73
deps/cmph/cxxmph/bm_common.h
vendored
Normal file
73
deps/cmph/cxxmph/bm_common.h
vendored
Normal file
@@ -0,0 +1,73 @@
|
||||
#ifndef __CXXMPH_BM_COMMON_H__
|
||||
#define __CXXMPH_BM_COMMON_H__
|
||||
|
||||
#include "stringpiece.h"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <unordered_map> // std::hash
|
||||
#include "MurmurHash3.h"
|
||||
|
||||
#include "benchmark.h"
|
||||
|
||||
namespace std {
|
||||
template <> struct hash<cxxmph::StringPiece> {
|
||||
uint32_t operator()(const cxxmph::StringPiece& k) const {
|
||||
uint32_t out;
|
||||
MurmurHash3_x86_32(k.data(), k.length(), 1, &out);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
} // namespace std
|
||||
|
||||
namespace cxxmph {
|
||||
|
||||
class UrlsBenchmark : public Benchmark {
|
||||
public:
|
||||
UrlsBenchmark(const std::string& urls_file) : urls_file_(urls_file) { }
|
||||
virtual ~UrlsBenchmark();
|
||||
protected:
|
||||
virtual bool SetUp();
|
||||
const std::string urls_file_;
|
||||
std::vector<std::string> urls_;
|
||||
};
|
||||
|
||||
class SearchUrlsBenchmark : public UrlsBenchmark {
|
||||
public:
|
||||
SearchUrlsBenchmark(const std::string& urls_file, uint32_t nsearches, float miss_ratio)
|
||||
: UrlsBenchmark(urls_file), nsearches_(nsearches), miss_ratio_(miss_ratio) {}
|
||||
virtual ~SearchUrlsBenchmark();
|
||||
protected:
|
||||
virtual bool SetUp();
|
||||
const uint32_t nsearches_;
|
||||
float miss_ratio_;
|
||||
std::vector<std::string> forced_miss_urls_;
|
||||
std::vector<StringPiece> random_;
|
||||
};
|
||||
|
||||
class Uint64Benchmark : public Benchmark {
|
||||
public:
|
||||
Uint64Benchmark(uint32_t count) : count_(count) { }
|
||||
virtual ~Uint64Benchmark();
|
||||
virtual void Run() {}
|
||||
protected:
|
||||
virtual bool SetUp();
|
||||
const uint32_t count_;
|
||||
std::vector<uint64_t> values_;
|
||||
};
|
||||
|
||||
class SearchUint64Benchmark : public Uint64Benchmark {
|
||||
public:
|
||||
SearchUint64Benchmark(uint32_t count, uint32_t nsearches)
|
||||
: Uint64Benchmark(count), nsearches_(nsearches) { }
|
||||
virtual ~SearchUint64Benchmark();
|
||||
virtual void Run() {};
|
||||
protected:
|
||||
virtual bool SetUp();
|
||||
const uint32_t nsearches_;
|
||||
std::vector<uint64_t> random_;
|
||||
};
|
||||
|
||||
} // namespace cxxmph
|
||||
|
||||
#endif // __CXXMPH_BM_COMMON_H__
|
||||
149
deps/cmph/cxxmph/bm_index.cc
vendored
Normal file
149
deps/cmph/cxxmph/bm_index.cc
vendored
Normal file
@@ -0,0 +1,149 @@
|
||||
#include <cmph.h>
|
||||
|
||||
#include <cstdio>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "bm_common.h"
|
||||
#include "stringpiece.h"
|
||||
#include "mph_index.h"
|
||||
|
||||
using namespace cxxmph;
|
||||
|
||||
using std::string;
|
||||
using std::unordered_map;
|
||||
|
||||
class BM_MPHIndexCreate : public UrlsBenchmark {
|
||||
public:
|
||||
BM_MPHIndexCreate(const std::string& urls_file)
|
||||
: UrlsBenchmark(urls_file) { }
|
||||
protected:
|
||||
virtual void Run() {
|
||||
SimpleMPHIndex<StringPiece> index;
|
||||
index.Reset(urls_.begin(), urls_.end(), urls_.size());
|
||||
}
|
||||
};
|
||||
|
||||
class BM_STLIndexCreate : public UrlsBenchmark {
|
||||
public:
|
||||
BM_STLIndexCreate(const std::string& urls_file)
|
||||
: UrlsBenchmark(urls_file) { }
|
||||
protected:
|
||||
virtual void Run() {
|
||||
unordered_map<StringPiece, uint32_t> index;
|
||||
int idx = 0;
|
||||
for (auto it = urls_.begin(); it != urls_.end(); ++it) {
|
||||
index.insert(make_pair(*it, idx++));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class BM_MPHIndexSearch : public SearchUrlsBenchmark {
|
||||
public:
|
||||
BM_MPHIndexSearch(const std::string& urls_file, int nsearches)
|
||||
: SearchUrlsBenchmark(urls_file, nsearches, 0) { }
|
||||
virtual void Run() {
|
||||
uint64_t sum = 0;
|
||||
for (auto it = random_.begin(); it != random_.end(); ++it) {
|
||||
auto idx = index_.index(*it);
|
||||
// Collision check to be fair with STL
|
||||
if (strcmp(urls_[idx].c_str(), it->data()) != 0) idx = -1;
|
||||
sum += idx;
|
||||
}
|
||||
}
|
||||
protected:
|
||||
virtual bool SetUp () {
|
||||
if (!SearchUrlsBenchmark::SetUp()) return false;
|
||||
index_.Reset(urls_.begin(), urls_.end(), urls_.size());
|
||||
return true;
|
||||
}
|
||||
SimpleMPHIndex<StringPiece> index_;
|
||||
};
|
||||
|
||||
class BM_CmphIndexSearch : public SearchUrlsBenchmark {
|
||||
public:
|
||||
BM_CmphIndexSearch(const std::string& urls_file, int nsearches)
|
||||
: SearchUrlsBenchmark(urls_file, nsearches, 0) { }
|
||||
~BM_CmphIndexSearch() { if (index_) cmph_destroy(index_); }
|
||||
virtual void Run() {
|
||||
uint64_t sum = 0;
|
||||
for (auto it = random_.begin(); it != random_.end(); ++it) {
|
||||
auto idx = cmph_search(index_, it->data(), it->length());
|
||||
// Collision check to be fair with STL
|
||||
if (strcmp(urls_[idx].c_str(), it->data()) != 0) idx = -1;
|
||||
sum += idx;
|
||||
}
|
||||
}
|
||||
protected:
|
||||
virtual bool SetUp() {
|
||||
if (!SearchUrlsBenchmark::SetUp()) {
|
||||
cerr << "Parent class setup failed." << endl;
|
||||
return false;
|
||||
}
|
||||
FILE* f = fopen(urls_file_.c_str(), "r");
|
||||
if (!f) {
|
||||
cerr << "Faied to open " << urls_file_ << endl;
|
||||
return false;
|
||||
}
|
||||
cmph_io_adapter_t* source = cmph_io_nlfile_adapter(f);
|
||||
if (!source) {
|
||||
cerr << "Faied to create io adapter for " << urls_file_ << endl;
|
||||
return false;
|
||||
}
|
||||
cmph_config_t* config = cmph_config_new(source);
|
||||
if (!config) {
|
||||
cerr << "Failed to create config" << endl;
|
||||
return false;
|
||||
}
|
||||
cmph_config_set_algo(config, CMPH_BDZ);
|
||||
cmph_t* mphf = cmph_new(config);
|
||||
if (!mphf) {
|
||||
cerr << "Failed to create mphf." << endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
cmph_config_destroy(config);
|
||||
cmph_io_nlfile_adapter_destroy(source);
|
||||
fclose(f);
|
||||
index_ = mphf;
|
||||
return true;
|
||||
}
|
||||
cmph_t* index_;
|
||||
};
|
||||
|
||||
|
||||
class BM_STLIndexSearch : public SearchUrlsBenchmark {
|
||||
public:
|
||||
BM_STLIndexSearch(const std::string& urls_file, int nsearches)
|
||||
: SearchUrlsBenchmark(urls_file, nsearches, 0) { }
|
||||
virtual void Run() {
|
||||
uint64_t sum = 0;
|
||||
for (auto it = random_.begin(); it != random_.end(); ++it) {
|
||||
auto idx = index_.find(*it);
|
||||
sum += idx->second;
|
||||
}
|
||||
}
|
||||
protected:
|
||||
virtual bool SetUp () {
|
||||
if (!SearchUrlsBenchmark::SetUp()) return false;
|
||||
unordered_map<StringPiece, uint32_t> index;
|
||||
int idx = 0;
|
||||
for (auto it = urls_.begin(); it != urls_.end(); ++it) {
|
||||
index.insert(make_pair(*it, idx++));
|
||||
}
|
||||
index.swap(index_);
|
||||
return true;
|
||||
}
|
||||
unordered_map<StringPiece, uint32_t> index_;
|
||||
};
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
Benchmark::Register(new BM_MPHIndexCreate("URLS100k"));
|
||||
Benchmark::Register(new BM_STLIndexCreate("URLS100k"));
|
||||
Benchmark::Register(new BM_MPHIndexSearch("URLS100k", 10*1000*1000));
|
||||
Benchmark::Register(new BM_STLIndexSearch("URLS100k", 10*1000*1000));
|
||||
Benchmark::Register(new BM_CmphIndexSearch("URLS100k", 10*1000*1000));
|
||||
Benchmark::RunAll();
|
||||
return 0;
|
||||
}
|
||||
126
deps/cmph/cxxmph/bm_map.cc
vendored
Normal file
126
deps/cmph/cxxmph/bm_map.cc
vendored
Normal file
@@ -0,0 +1,126 @@
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "hopscotch_map.h"
|
||||
|
||||
#include "bm_common.h"
|
||||
#include "mph_map.h"
|
||||
|
||||
using std::string;
|
||||
|
||||
// Another reference benchmark:
|
||||
// http://blog.aggregateknowledge.com/tag/bigmemory/
|
||||
|
||||
namespace cxxmph {
|
||||
|
||||
template <class MapType, class T>
|
||||
const T* myfind(const MapType& mymap, const T& k) {
|
||||
auto it = mymap.find(k);
|
||||
auto end = mymap.end();
|
||||
if (it == end) return NULL;
|
||||
return &it->second;
|
||||
}
|
||||
|
||||
template <class MapType>
|
||||
class BM_CreateUrls : public UrlsBenchmark {
|
||||
public:
|
||||
BM_CreateUrls(const string& urls_file) : UrlsBenchmark(urls_file) { }
|
||||
virtual void Run() {
|
||||
MapType mymap;
|
||||
for (auto it = urls_.begin(); it != urls_.end(); ++it) {
|
||||
mymap[*it] = *it;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <class MapType>
|
||||
class BM_SearchUrls : public SearchUrlsBenchmark {
|
||||
public:
|
||||
BM_SearchUrls(const std::string& urls_file, int nsearches, float miss_ratio)
|
||||
: SearchUrlsBenchmark(urls_file, nsearches, miss_ratio) { }
|
||||
virtual ~BM_SearchUrls() {}
|
||||
virtual void Run() {
|
||||
uint32_t total = 1;
|
||||
for (auto it = random_.begin(); it != random_.end(); ++it) {
|
||||
auto v = myfind(mymap_, *it);
|
||||
if (v) total += v->length();
|
||||
}
|
||||
fprintf(stderr, "Total: %u\n", total);
|
||||
}
|
||||
protected:
|
||||
virtual bool SetUp() {
|
||||
if (!SearchUrlsBenchmark::SetUp()) return false;
|
||||
for (auto it = urls_.begin(); it != urls_.end(); ++it) {
|
||||
mymap_[*it] = *it;
|
||||
}
|
||||
mymap_.rehash(mymap_.bucket_count());
|
||||
fprintf(stderr, "Occupation: %f\n", static_cast<float>(mymap_.size())/mymap_.bucket_count());
|
||||
return true;
|
||||
}
|
||||
MapType mymap_;
|
||||
};
|
||||
|
||||
template <class MapType>
|
||||
class BM_SearchUint64 : public SearchUint64Benchmark {
|
||||
public:
|
||||
BM_SearchUint64() : SearchUint64Benchmark(100000, 10*1000*1000) { }
|
||||
virtual bool SetUp() {
|
||||
if (!SearchUint64Benchmark::SetUp()) return false;
|
||||
for (uint32_t i = 0; i < values_.size(); ++i) {
|
||||
mymap_[values_[i]] = values_[i];
|
||||
}
|
||||
mymap_.rehash(mymap_.bucket_count());
|
||||
// Double check if everything is all right
|
||||
cerr << "Doing double check" << endl;
|
||||
for (uint32_t i = 0; i < values_.size(); ++i) {
|
||||
if (mymap_[values_[i]] != values_[i]) {
|
||||
cerr << "Looking for " << i << " th key value " << values_[i];
|
||||
cerr << " yielded " << mymap_[values_[i]] << endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
virtual void Run() {
|
||||
for (auto it = random_.begin(); it != random_.end(); ++it) {
|
||||
auto v = myfind(mymap_, *it);
|
||||
if (*v != *it) {
|
||||
cerr << "Looked for " << *it << " got " << *v << endl;
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
MapType mymap_;
|
||||
};
|
||||
|
||||
} // namespace cxxmph
|
||||
|
||||
using namespace cxxmph;
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
srandom(4);
|
||||
Benchmark::Register(new BM_CreateUrls<dense_hash_map<StringPiece, StringPiece>>("URLS100k"));
|
||||
Benchmark::Register(new BM_CreateUrls<std::unordered_map<StringPiece, StringPiece>>("URLS100k"));
|
||||
Benchmark::Register(new BM_CreateUrls<mph_map<StringPiece, StringPiece>>("URLS100k"));
|
||||
Benchmark::Register(new BM_CreateUrls<sparse_hash_map<StringPiece, StringPiece>>("URLS100k"));
|
||||
Benchmark::Register(new BM_CreateUrls<tsl::hopscotch_map<StringPiece, StringPiece>>("URLS100k"));
|
||||
|
||||
Benchmark::Register(new BM_SearchUrls<dense_hash_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0));
|
||||
Benchmark::Register(new BM_SearchUrls<std::unordered_map<StringPiece, StringPiece, Murmur3StringPiece>>("URLS100k", 10*1000 * 1000, 0));
|
||||
Benchmark::Register(new BM_SearchUrls<mph_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0));
|
||||
Benchmark::Register(new BM_SearchUrls<sparse_hash_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0));
|
||||
Benchmark::Register(new BM_SearchUrls<tsl::hopscotch_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0));
|
||||
|
||||
Benchmark::Register(new BM_SearchUrls<dense_hash_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0.9));
|
||||
Benchmark::Register(new BM_SearchUrls<std::unordered_map<StringPiece, StringPiece, Murmur3StringPiece>>("URLS100k", 10*1000 * 1000, 0.9));
|
||||
Benchmark::Register(new BM_SearchUrls<mph_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0.9));
|
||||
Benchmark::Register(new BM_SearchUrls<sparse_hash_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0.9));
|
||||
Benchmark::Register(new BM_SearchUrls<tsl::hopscotch_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0.9));
|
||||
|
||||
Benchmark::Register(new BM_SearchUint64<dense_hash_map<uint64_t, uint64_t>>);
|
||||
Benchmark::Register(new BM_SearchUint64<std::unordered_map<uint64_t, uint64_t>>);
|
||||
Benchmark::Register(new BM_SearchUint64<mph_map<uint64_t, uint64_t>>);
|
||||
Benchmark::Register(new BM_SearchUint64<sparse_hash_map<uint64_t, uint64_t>>);
|
||||
Benchmark::Register(new BM_SearchUint64<tsl::hopscotch_map<uint64_t, uint64_t>>);
|
||||
Benchmark::RunAll();
|
||||
}
|
||||
74
deps/cmph/cxxmph/cxxmph.cc
vendored
Normal file
74
deps/cmph/cxxmph/cxxmph.cc
vendored
Normal file
@@ -0,0 +1,74 @@
|
||||
// Copyright 2010 Google Inc. All Rights Reserved.
|
||||
// Author: davi@google.com (Davi Reis)
|
||||
|
||||
#include <getopt.h>
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "mph_map.h"
|
||||
#include "config.h"
|
||||
|
||||
using std::cerr;
|
||||
using std::cout;
|
||||
using std::endl;
|
||||
using std::getline;
|
||||
using std::ifstream;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
using cxxmph::mph_map;
|
||||
|
||||
void usage(const char* prg) {
|
||||
cerr << "usage: " << prg << " [-v] [-h] [-V] <keys.txt>" << endl;
|
||||
}
|
||||
void usage_long(const char* prg) {
|
||||
usage(prg);
|
||||
cerr << " -h\t print this help message" << endl;
|
||||
cerr << " -V\t print version number and exit" << endl;
|
||||
cerr << " -v\t increase verbosity (may be used multiple times)" << endl;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
||||
int verbosity = 0;
|
||||
while (1) {
|
||||
char ch = (char)getopt(argc, argv, "hvV");
|
||||
if (ch == -1) break;
|
||||
switch (ch) {
|
||||
case 'h':
|
||||
usage_long(argv[0]);
|
||||
return 0;
|
||||
case 'V':
|
||||
std::cout << VERSION << std::endl;
|
||||
return 0;
|
||||
case 'v':
|
||||
++verbosity;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (optind != argc - 1) {
|
||||
usage(argv[0]);
|
||||
return 1;
|
||||
}
|
||||
vector<string> keys;
|
||||
ifstream f(argv[optind]);
|
||||
if (!f.is_open()) {
|
||||
std::cerr << "Failed to open " << argv[optind] << std::endl;
|
||||
exit(-1);
|
||||
}
|
||||
string buffer;
|
||||
while (!getline(f, buffer).eof()) keys.push_back(buffer);
|
||||
for (uint32_t i = 0; i < keys.size(); ++i) string s = keys[i];
|
||||
mph_map<string, string> table;
|
||||
|
||||
for (uint32_t i = 0; i < keys.size(); ++i) table[keys[i]] = keys[i];
|
||||
mph_map<string, string>::const_iterator it = table.begin();
|
||||
mph_map<string, string>::const_iterator end = table.end();
|
||||
for (int i = 0; it != end; ++it, ++i) {
|
||||
cout << i << ": " << it->first
|
||||
<<" -> " << it->second << endl;
|
||||
}
|
||||
}
|
||||
25
deps/cmph/cxxmph/dense_hash_map_test.cc
vendored
Normal file
25
deps/cmph/cxxmph/dense_hash_map_test.cc
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "mph_map.h"
|
||||
#include "map_tester.h"
|
||||
#include "test.h"
|
||||
|
||||
using namespace cxxmph;
|
||||
|
||||
typedef MapTester<dense_hash_map> Tester;
|
||||
|
||||
CXXMPH_CXX_TEST_CASE(empty_find, Tester::empty_find);
|
||||
CXXMPH_CXX_TEST_CASE(empty_erase, Tester::empty_erase);
|
||||
CXXMPH_CXX_TEST_CASE(small_insert, Tester::small_insert);
|
||||
CXXMPH_CXX_TEST_CASE(large_insert, Tester::large_insert);
|
||||
CXXMPH_CXX_TEST_CASE(small_search, Tester::small_search);
|
||||
CXXMPH_CXX_TEST_CASE(default_search, Tester::default_search);
|
||||
CXXMPH_CXX_TEST_CASE(large_search, Tester::large_search);
|
||||
CXXMPH_CXX_TEST_CASE(string_search, Tester::string_search);
|
||||
CXXMPH_CXX_TEST_CASE(rehash_zero, Tester::rehash_zero);
|
||||
CXXMPH_CXX_TEST_CASE(rehash_size, Tester::rehash_size);
|
||||
CXXMPH_CXX_TEST_CASE(erase_value, Tester::erase_value);
|
||||
CXXMPH_CXX_TEST_CASE(erase_iterator, Tester::erase_iterator);
|
||||
81
deps/cmph/cxxmph/hollow_iterator.h
vendored
Normal file
81
deps/cmph/cxxmph/hollow_iterator.h
vendored
Normal file
@@ -0,0 +1,81 @@
|
||||
#ifndef __CXXMPH_HOLLOW_ITERATOR_H__
|
||||
#define __CXXMPH_HOLLOW_ITERATOR_H__
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace cxxmph {
|
||||
|
||||
using std::vector;
|
||||
|
||||
template <typename container_type>
|
||||
struct is_empty {
|
||||
public:
|
||||
is_empty() : c_(NULL), p_(NULL) {};
|
||||
is_empty(const container_type* c, const vector<bool>* p) : c_(c), p_(p) {};
|
||||
bool operator()(typename container_type::const_iterator it) const {
|
||||
if (it == c_->end()) return false;
|
||||
return !(*p_)[it - c_->begin()];
|
||||
}
|
||||
private:
|
||||
const container_type* c_;
|
||||
const vector<bool>* p_;
|
||||
};
|
||||
|
||||
template <typename iterator, typename is_empty>
|
||||
struct hollow_iterator_base
|
||||
: public std::iterator<std::forward_iterator_tag,
|
||||
typename iterator::value_type> {
|
||||
public:
|
||||
typedef hollow_iterator_base<iterator, is_empty> self_type;
|
||||
typedef self_type& self_reference;
|
||||
typedef typename iterator::reference reference;
|
||||
typedef typename iterator::pointer pointer;
|
||||
inline hollow_iterator_base() : it_(), empty_() { }
|
||||
inline hollow_iterator_base(iterator it, is_empty empty, bool solid) : it_(it), empty_(empty) {
|
||||
if (!solid) advance();
|
||||
}
|
||||
// Same as above, assumes solid==true.
|
||||
inline hollow_iterator_base(iterator it, is_empty empty) : it_(it), empty_(empty) {}
|
||||
inline hollow_iterator_base(const self_type& rhs) { it_ = rhs.it_; empty_ = rhs.empty_; }
|
||||
template <typename const_iterator>
|
||||
hollow_iterator_base(const hollow_iterator_base<const_iterator, is_empty>& rhs) { it_ = rhs.it_; empty_ = rhs.empty_; }
|
||||
|
||||
reference operator*() { return *it_; }
|
||||
pointer operator->() { return &(*it_); }
|
||||
self_reference operator++() { ++it_; advance(); return *this; }
|
||||
// self_type operator++() { auto tmp(*this); ++tmp; return tmp; }
|
||||
|
||||
template <typename const_iterator>
|
||||
bool operator==(const hollow_iterator_base<const_iterator, is_empty>& rhs) { return rhs.it_ == it_; }
|
||||
template <typename const_iterator>
|
||||
bool operator!=(const hollow_iterator_base<const_iterator, is_empty>& rhs) { return rhs.it_ != it_; }
|
||||
|
||||
// should be friend
|
||||
iterator it_;
|
||||
is_empty empty_;
|
||||
|
||||
private:
|
||||
void advance() {
|
||||
while (empty_(it_)) ++it_;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename container_type, typename iterator>
|
||||
inline auto make_solid(
|
||||
container_type* v, const vector<bool>* p, iterator it) ->
|
||||
hollow_iterator_base<iterator, is_empty<const container_type>> {
|
||||
return hollow_iterator_base<iterator, is_empty<const container_type>>(
|
||||
it, is_empty<const container_type>(v, p));
|
||||
}
|
||||
|
||||
template <typename container_type, typename iterator>
|
||||
inline auto make_hollow(
|
||||
container_type* v, const vector<bool>* p, iterator it) ->
|
||||
hollow_iterator_base<iterator, is_empty<const container_type>> {
|
||||
return hollow_iterator_base<iterator, is_empty<const container_type>>(
|
||||
it, is_empty<const container_type>(v, p), false);
|
||||
}
|
||||
|
||||
} // namespace cxxmph
|
||||
|
||||
#endif // __CXXMPH_HOLLOW_ITERATOR_H__
|
||||
49
deps/cmph/cxxmph/hollow_iterator_test.cc
vendored
Normal file
49
deps/cmph/cxxmph/hollow_iterator_test.cc
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
|
||||
|
||||
using std::cerr;
|
||||
using std::endl;
|
||||
using std::vector;
|
||||
#include "hollow_iterator.h"
|
||||
using cxxmph::hollow_iterator_base;
|
||||
using cxxmph::make_hollow;
|
||||
using cxxmph::is_empty;
|
||||
|
||||
int main(int, char**) {
|
||||
vector<int> v;
|
||||
vector<bool> p;
|
||||
for (int i = 0; i < 100; ++i) {
|
||||
v.push_back(i);
|
||||
p.push_back(i % 2 == 0);
|
||||
}
|
||||
auto begin = make_hollow(&v, &p, v.begin());
|
||||
auto end = make_hollow(&v, &p, v.end());
|
||||
for (auto it = begin; it != end; ++it) {
|
||||
if (((*it) % 2) != 0) exit(-1);
|
||||
}
|
||||
const vector<int>* cv(&v);
|
||||
auto cbegin(make_hollow(cv, &p, cv->begin()));
|
||||
auto cend(make_hollow(cv, &p, cv->begin()));
|
||||
for (auto it = cbegin; it != cend; ++it) {
|
||||
if (((*it) % 2) != 0) exit(-1);
|
||||
}
|
||||
const vector<bool>* cp(&p);
|
||||
cbegin = make_hollow(cv, cp, v.begin());
|
||||
cend = make_hollow(cv, cp, cv->end());
|
||||
|
||||
vector<int>::iterator vit1 = v.begin();
|
||||
vector<int>::const_iterator vit2 = v.begin();
|
||||
if (vit1 != vit2) exit(-1);
|
||||
auto it1 = make_hollow(&v, &p, vit1);
|
||||
auto it2 = make_hollow(&v, &p, vit2);
|
||||
if (it1 != it2) exit(-1);
|
||||
|
||||
typedef is_empty<const vector<int>> iev;
|
||||
hollow_iterator_base<vector<int>::iterator, iev> default_constructed;
|
||||
default_constructed = make_hollow(&v, &p, v.begin());
|
||||
return 0;
|
||||
}
|
||||
|
||||
4
deps/cmph/cxxmph/map_tester.cc
vendored
Normal file
4
deps/cmph/cxxmph/map_tester.cc
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
#include "map_tester.h"
|
||||
|
||||
namespace cxxxmph {
|
||||
}
|
||||
138
deps/cmph/cxxmph/map_tester.h
vendored
Normal file
138
deps/cmph/cxxmph/map_tester.h
vendored
Normal file
@@ -0,0 +1,138 @@
|
||||
#ifndef __CXXMPH_MAP_TEST_HELPER_H__
|
||||
#define __CXXMPH_MAP_TEST_HELPER_H__
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "string_util.h"
|
||||
#include <check.h>
|
||||
|
||||
namespace cxxmph {
|
||||
|
||||
using namespace cxxmph;
|
||||
using namespace std;
|
||||
|
||||
template <template<typename...> class map_type>
|
||||
struct MapTester {
|
||||
static bool empty_find() {
|
||||
map_type<int64_t, int64_t> m;
|
||||
for (int i = 0; i < 1000; ++i) {
|
||||
if (m.find(i) != m.end()) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
static bool empty_erase() {
|
||||
map_type<int64_t, int64_t> m;
|
||||
for (int i = 0; i < 1000; ++i) {
|
||||
m.erase(i);
|
||||
if (m.size()) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
static bool small_insert() {
|
||||
map_type<int64_t, int64_t> m;
|
||||
// Start counting from 1 to not touch default constructed value bugs
|
||||
for (int i = 1; i < 12; ++i) m.insert(make_pair(i, i));
|
||||
return m.size() == 11;
|
||||
}
|
||||
static bool large_insert() {
|
||||
map_type<int64_t, int64_t> m;
|
||||
// Start counting from 1 to not touch default constructed value bugs
|
||||
int nkeys = 12 * 256 * 256;
|
||||
for (int i = 1; i < nkeys; ++i) m.insert(make_pair(i, i));
|
||||
return static_cast<int>(m.size()) == nkeys - 1;
|
||||
}
|
||||
static bool small_search() {
|
||||
map_type<int64_t, int64_t> m;
|
||||
// Start counting from 1 to not touch default constructed value bugs
|
||||
for (int i = 1; i < 12; ++i) m.insert(make_pair(i, i));
|
||||
for (int i = 1; i < 12; ++i) if (m.find(i) == m.end()) return false;
|
||||
return true;
|
||||
}
|
||||
static bool default_search() {
|
||||
map_type<int64_t, int64_t> m;
|
||||
if (m.find(0) != m.end()) return false;
|
||||
for (int i = 1; i < 256; ++i) m.insert(make_pair(i, i));
|
||||
if (m.find(0) != m.end()) return false;
|
||||
for (int i = 0; i < 256; ++i) m.insert(make_pair(i, i));
|
||||
if (m.find(0) == m.end()) return false;
|
||||
return true;
|
||||
}
|
||||
static bool large_search() {
|
||||
int nkeys = 10 * 1000;
|
||||
map_type<int64_t, int64_t> m;
|
||||
for (int i = 0; i < nkeys; ++i) m.insert(make_pair(i, i));
|
||||
for (int i = 0; i < nkeys; ++i) if (m.find(i) == m.end()) return false;
|
||||
return true;
|
||||
}
|
||||
static bool string_search() {
|
||||
int nkeys = 10 * 1000;
|
||||
vector<string> keys;
|
||||
for (int i = 0; i < nkeys; ++i) {
|
||||
keys.push_back(format("%v", i));
|
||||
}
|
||||
map_type<string, int64_t> m;
|
||||
for (int i = 0; i < nkeys; ++i) m.insert(make_pair(keys[i], i));
|
||||
for (int i = 0; i < nkeys; ++i) {
|
||||
auto it = m.find(keys[i]);
|
||||
if (it == m.end()) return false;
|
||||
if (it->second != i) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
static bool rehash_zero() {
|
||||
map_type<int64_t, int64_t> m;
|
||||
m.rehash(0);
|
||||
return m.size() == 0;
|
||||
}
|
||||
static bool rehash_size() {
|
||||
map_type<int64_t, int64_t> m;
|
||||
int nkeys = 10 * 1000;
|
||||
for (int i = 0; i < nkeys; ++i) { m.insert(make_pair(i, i)); }
|
||||
m.rehash(nkeys);
|
||||
for (int i = 0; i < nkeys; ++i) { if (m.find(i) == m.end()) return false; }
|
||||
for (int i = nkeys; i < nkeys * 2; ++i) {
|
||||
if (m.find(i) != m.end()) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
static bool erase_iterator() {
|
||||
map_type<int64_t, int64_t> m;
|
||||
int nkeys = 10 * 1000;
|
||||
for (int i = 0; i < nkeys; ++i) { m.insert(make_pair(i, i)); }
|
||||
for (int i = 0; i < nkeys; ++i) {
|
||||
if (m.find(i) == m.end()) return false;
|
||||
}
|
||||
for (int i = nkeys - 1; i >= 0; --i) { if (m.find(i) == m.end()) return false; }
|
||||
for (int i = nkeys - 1; i >= 0; --i) {
|
||||
fail_unless(m.find(i) != m.end(), "after erase %d cannot be found", i);
|
||||
fail_unless(m.find(i)->first == i, "after erase key %d cannot be found", i);
|
||||
}
|
||||
for (int i = nkeys - 1; i >= 0; --i) {
|
||||
fail_unless(m.find(i) != m.end(), "after erase %d cannot be found", i);
|
||||
fail_unless(m.find(i)->first == i, "after erase key %d cannot be found", i);
|
||||
if (!(m.find(i)->first == i)) return false;
|
||||
m.erase(m.find(i));
|
||||
if (static_cast<int>(m.size()) != i) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
static bool erase_value() {
|
||||
map_type<int64_t, int64_t> m;
|
||||
int nkeys = 10 * 1000;
|
||||
for (int i = 0; i < nkeys; ++i) { m.insert(make_pair(i, i)); }
|
||||
for (int i = nkeys - 1; i >= 0; --i) {
|
||||
fail_unless(m.find(i) != m.end());
|
||||
m.erase(i);
|
||||
if (static_cast<int>(m.size()) != i) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace cxxxmph
|
||||
|
||||
#endif // __CXXMPH_MAP_TEST_HELPER_H__
|
||||
17
deps/cmph/cxxmph/map_tester_test.cc
vendored
Normal file
17
deps/cmph/cxxmph/map_tester_test.cc
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
#include "map_tester.h"
|
||||
#include "test.h"
|
||||
|
||||
using namespace cxxmph;
|
||||
|
||||
typedef MapTester<std::unordered_map> Tester;
|
||||
|
||||
CXXMPH_CXX_TEST_CASE(small_insert, Tester::small_insert);
|
||||
CXXMPH_CXX_TEST_CASE(large_insert, Tester::large_insert);
|
||||
CXXMPH_CXX_TEST_CASE(small_search, Tester::small_search);
|
||||
CXXMPH_CXX_TEST_CASE(default_search, Tester::default_search);
|
||||
CXXMPH_CXX_TEST_CASE(large_search, Tester::large_search);
|
||||
CXXMPH_CXX_TEST_CASE(string_search, Tester::string_search);
|
||||
CXXMPH_CXX_TEST_CASE(rehash_zero, Tester::rehash_zero);
|
||||
CXXMPH_CXX_TEST_CASE(rehash_size, Tester::rehash_size);
|
||||
CXXMPH_CXX_TEST_CASE(erase_value, Tester::erase_value);
|
||||
CXXMPH_CXX_TEST_CASE(erase_iterator, Tester::erase_iterator);
|
||||
11
deps/cmph/cxxmph/mph_bits.cc
vendored
Normal file
11
deps/cmph/cxxmph/mph_bits.cc
vendored
Normal file
@@ -0,0 +1,11 @@
|
||||
#include "mph_bits.h"
|
||||
|
||||
namespace cxxmph {
|
||||
|
||||
const uint8_t dynamic_2bitset::vmask[] = { 0xfc, 0xf3, 0xcf, 0x3f};
|
||||
dynamic_2bitset::dynamic_2bitset() : size_(0), fill_(false) {}
|
||||
dynamic_2bitset::dynamic_2bitset(uint32_t size, bool fill)
|
||||
: size_(size), fill_(fill), data_(ceil(size / 4.0), ones()*fill) {}
|
||||
dynamic_2bitset::~dynamic_2bitset() {}
|
||||
|
||||
}
|
||||
73
deps/cmph/cxxmph/mph_bits.h
vendored
Normal file
73
deps/cmph/cxxmph/mph_bits.h
vendored
Normal file
@@ -0,0 +1,73 @@
|
||||
#ifndef __CXXMPH_MPH_BITS_H__
|
||||
#define __CXXMPH_MPH_BITS_H__
|
||||
|
||||
#include <stdint.h> // for uint32_t and friends
|
||||
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
#include <climits>
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
||||
namespace cxxmph {
|
||||
|
||||
class dynamic_2bitset {
|
||||
public:
|
||||
dynamic_2bitset();
|
||||
~dynamic_2bitset();
|
||||
dynamic_2bitset(uint32_t size, bool fill = false);
|
||||
|
||||
const uint8_t operator[](uint32_t i) const { return get(i); }
|
||||
const uint8_t get(uint32_t i) const {
|
||||
assert(i < size());
|
||||
assert((i >> 2) < data_.size());
|
||||
return (data_[(i >> 2)] >> (((i & 3) << 1)) & 3);
|
||||
}
|
||||
void set(uint32_t i, uint8_t v) {
|
||||
assert((i >> 2) < data_.size());
|
||||
data_[(i >> 2)] |= ones() ^ dynamic_2bitset::vmask[i & 3];
|
||||
data_[(i >> 2)] &= ((v << ((i & 3) << 1)) | dynamic_2bitset::vmask[i & 3]);
|
||||
assert(v <= 3);
|
||||
assert(get(i) == v);
|
||||
}
|
||||
void resize(uint32_t size) {
|
||||
size_ = size;
|
||||
data_.resize(size >> 2, fill_*ones());
|
||||
}
|
||||
void swap(dynamic_2bitset& other) {
|
||||
std::swap(other.size_, size_);
|
||||
std::swap(other.fill_, fill_);
|
||||
other.data_.swap(data_);
|
||||
}
|
||||
void clear() { data_.clear(); size_ = 0; }
|
||||
|
||||
uint32_t size() const { return size_; }
|
||||
static const uint8_t vmask[];
|
||||
const std::vector<uint8_t>& data() const { return data_; }
|
||||
private:
|
||||
uint32_t size_;
|
||||
bool fill_;
|
||||
std::vector<uint8_t> data_;
|
||||
const uint8_t ones() { return std::numeric_limits<uint8_t>::max(); }
|
||||
};
|
||||
|
||||
static uint32_t nextpoweroftwo(uint32_t k) {
|
||||
if (k == 0) return 1;
|
||||
k--;
|
||||
for (uint32_t i=1; i<sizeof(uint32_t)*CHAR_BIT; i<<=1) k = k | k >> i;
|
||||
return k+1;
|
||||
}
|
||||
// Interesting bit tricks that might end up here:
|
||||
// http://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
|
||||
// Fast a % (k*2^t)
|
||||
// http://www.azillionmonkeys.com/qed/adiv.html
|
||||
// rank and select:
|
||||
// http://vigna.dsi.unimi.it/ftp/papers/Broadword.pdf
|
||||
|
||||
} // namespace cxxmph
|
||||
|
||||
#endif
|
||||
61
deps/cmph/cxxmph/mph_bits_test.cc
vendored
Normal file
61
deps/cmph/cxxmph/mph_bits_test.cc
vendored
Normal file
@@ -0,0 +1,61 @@
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "mph_bits.h"
|
||||
|
||||
using cxxmph::dynamic_2bitset;
|
||||
using cxxmph::nextpoweroftwo;
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
dynamic_2bitset small(256, true);
|
||||
for (uint32_t i = 0; i < small.size(); ++i) small.set(i, i % 4);
|
||||
for (uint32_t i = 0; i < small.size(); ++i) {
|
||||
if (small[i] != i % 4) {
|
||||
fprintf(stderr, "wrong bits %d at %d expected %d\n", small[i], i, i % 4);
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t size = 256;
|
||||
dynamic_2bitset bits(size, true /* fill with ones */);
|
||||
for (uint32_t i = 0; i < size; ++i) {
|
||||
if (bits[i] != 3) {
|
||||
fprintf(stderr, "wrong bits %d at %d expected %d\n", bits[i], i, 3);
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
for (uint32_t i = 0; i < size; ++i) bits.set(i, 0);
|
||||
for (uint32_t i = 0; i < size; ++i) {
|
||||
if (bits[i] != 0) {
|
||||
fprintf(stderr, "wrong bits %d at %d expected %d\n", bits[i], i, 0);
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
for (uint32_t i = 0; i < size; ++i) bits.set(i, i % 4);
|
||||
for (uint32_t i = 0; i < size; ++i) {
|
||||
if (bits[i] != i % 4) {
|
||||
fprintf(stderr, "wrong bits %d at %d expected %d\n", bits[i], i, i % 4);
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
dynamic_2bitset size_corner1(1);
|
||||
if (size_corner1.size() != 1) exit(-1);
|
||||
dynamic_2bitset size_corner2(2);
|
||||
if (size_corner2.size() != 2) exit(-1);
|
||||
(dynamic_2bitset(4, true)).swap(size_corner2);
|
||||
if (size_corner2.size() != 4) exit(-1);
|
||||
for (uint32_t i = 0; i < size_corner2.size(); ++i) {
|
||||
if (size_corner2[i] != 3) exit(-1);
|
||||
}
|
||||
size_corner2.clear();
|
||||
if (size_corner2.size() != 0) exit(-1);
|
||||
|
||||
dynamic_2bitset empty;
|
||||
empty.clear();
|
||||
dynamic_2bitset large(1000, true);
|
||||
empty.swap(large);
|
||||
|
||||
if (nextpoweroftwo(3) != 4) exit(-1);
|
||||
}
|
||||
|
||||
|
||||
229
deps/cmph/cxxmph/mph_index.cc
vendored
Normal file
229
deps/cmph/cxxmph/mph_index.cc
vendored
Normal file
@@ -0,0 +1,229 @@
|
||||
#include <limits>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
using std::cerr;
|
||||
using std::endl;
|
||||
|
||||
#include "mph_index.h"
|
||||
|
||||
using std::vector;
|
||||
|
||||
namespace {
|
||||
|
||||
static const uint8_t kUnassigned = 3;
|
||||
// table used for looking up the number of assigned vertices to a 8-bit integer
|
||||
static uint8_t kBdzLookupIndex[] =
|
||||
{
|
||||
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
||||
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
||||
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
||||
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
||||
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
||||
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
||||
2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1, 0
|
||||
};
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
namespace cxxmph {
|
||||
|
||||
MPHIndex::~MPHIndex() {
|
||||
clear();
|
||||
|
||||
}
|
||||
|
||||
void MPHIndex::clear() {
|
||||
std::vector<uint32_t> empty_ranktable;
|
||||
ranktable_.swap(empty_ranktable);
|
||||
dynamic_2bitset empty_g;
|
||||
g_.swap(empty_g);
|
||||
}
|
||||
|
||||
bool MPHIndex::GenerateQueue(
|
||||
TriGraph* graph, vector<uint32_t>* queue_output) {
|
||||
uint32_t queue_head = 0, queue_tail = 0;
|
||||
uint32_t nedges = m_;
|
||||
uint32_t nvertices = n_;
|
||||
// Relies on vector<bool> using 1 bit per element
|
||||
vector<bool> marked_edge(nedges + 1, false);
|
||||
vector<uint32_t> queue(nvertices, 0);
|
||||
for (uint32_t i = 0; i < nedges; ++i) {
|
||||
const TriGraph::Edge& e = graph->edges()[i];
|
||||
if (graph->vertex_degree()[e[0]] == 1 ||
|
||||
graph->vertex_degree()[e[1]] == 1 ||
|
||||
graph->vertex_degree()[e[2]] == 1) {
|
||||
if (!marked_edge[i]) {
|
||||
queue[queue_head++] = i;
|
||||
marked_edge[i] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
for (unsigned int i = 0; i < marked_edge.size(); ++i) {
|
||||
cerr << "vertex with degree " << static_cast<uint32_t>(graph->vertex_degree()[i]) << " marked " << marked_edge[i] << endl;
|
||||
}
|
||||
for (unsigned int i = 0; i < queue.size(); ++i) {
|
||||
cerr << "vertex " << i << " queued at " << queue[i] << endl;
|
||||
}
|
||||
*/
|
||||
// At this point queue head is the number of edges touching at least one
|
||||
// vertex of degree 1.
|
||||
// cerr << "Queue head " << queue_head << " Queue tail " << queue_tail << endl;
|
||||
// graph->DebugGraph();
|
||||
while (queue_tail != queue_head) {
|
||||
uint32_t current_edge = queue[queue_tail++];
|
||||
graph->RemoveEdge(current_edge);
|
||||
const TriGraph::Edge& e = graph->edges()[current_edge];
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
uint32_t v = e[i];
|
||||
if (graph->vertex_degree()[v] == 1) {
|
||||
uint32_t first_edge = graph->first_edge()[v];
|
||||
if (!marked_edge[first_edge]) {
|
||||
queue[queue_head++] = first_edge;
|
||||
marked_edge[first_edge] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
for (unsigned int i = 0; i < queue.size(); ++i) {
|
||||
cerr << "vertex " << i << " queued at " << queue[i] << endl;
|
||||
}
|
||||
*/
|
||||
int cycles = queue_head - nedges;
|
||||
if (cycles == 0) queue.swap(*queue_output);
|
||||
return cycles == 0;
|
||||
}
|
||||
|
||||
void MPHIndex::Assigning(
|
||||
const vector<TriGraph::Edge>& edges, const vector<uint32_t>& queue) {
|
||||
uint32_t current_edge = 0;
|
||||
vector<bool> marked_vertices(n_ + 1);
|
||||
dynamic_2bitset(8, true).swap(g_);
|
||||
// Initialize vector of half nibbles with all bits set.
|
||||
dynamic_2bitset g(n_, true /* set bits to 1 */);
|
||||
|
||||
uint32_t nedges = m_; // for legibility
|
||||
for (int i = nedges - 1; i + 1 >= 1; --i) {
|
||||
current_edge = queue[i];
|
||||
const TriGraph::Edge& e = edges[current_edge];
|
||||
/*
|
||||
cerr << "B: " << e[0] << " " << e[1] << " " << e[2] << " -> "
|
||||
<< get_2bit_value(g_, e[0]) << " "
|
||||
<< get_2bit_value(g_, e[1]) << " "
|
||||
<< get_2bit_value(g_, e[2]) << " edge " << current_edge << endl;
|
||||
*/
|
||||
if (!marked_vertices[e[0]]) {
|
||||
if (!marked_vertices[e[1]]) {
|
||||
g.set(e[1], kUnassigned);
|
||||
marked_vertices[e[1]] = true;
|
||||
}
|
||||
if (!marked_vertices[e[2]]) {
|
||||
g.set(e[2], kUnassigned);
|
||||
assert(marked_vertices.size() > e[2]);
|
||||
marked_vertices[e[2]] = true;
|
||||
}
|
||||
g.set(e[0], (6 - (g[e[1]] + g[e[2]])) % 3);
|
||||
marked_vertices[e[0]] = true;
|
||||
} else if (!marked_vertices[e[1]]) {
|
||||
if (!marked_vertices[e[2]]) {
|
||||
g.set(e[2], kUnassigned);
|
||||
marked_vertices[e[2]] = true;
|
||||
}
|
||||
g.set(e[1], (7 - (g[e[0]] + g[e[2]])) % 3);
|
||||
marked_vertices[e[1]] = true;
|
||||
} else {
|
||||
g.set(e[2], (8 - (g[e[0]] + g[e[1]])) % 3);
|
||||
marked_vertices[e[2]] = true;
|
||||
}
|
||||
/*
|
||||
cerr << "A: " << e[0] << " " << e[1] << " " << e[2] << " -> "
|
||||
<< static_cast<uint32_t>(g[e[0]]) << " "
|
||||
<< static_cast<uint32_t>(g[e[1]]) << " "
|
||||
<< static_cast<uint32_t>(g[e[2]]) << " " << endl;
|
||||
*/
|
||||
}
|
||||
g_.swap(g);
|
||||
}
|
||||
|
||||
void MPHIndex::Ranking() {
|
||||
uint32_t nbytes_total = static_cast<uint32_t>(ceil(n_ / 4.0));
|
||||
uint32_t size = k_ >> 2U;
|
||||
uint32_t ranktable_size = static_cast<uint32_t>(
|
||||
ceil(n_ / static_cast<double>(k_)));
|
||||
vector<uint32_t> ranktable(ranktable_size);
|
||||
uint32_t offset = 0;
|
||||
uint32_t count = 0;
|
||||
uint32_t i = 1;
|
||||
while (1) {
|
||||
if (i == ranktable.size()) break;
|
||||
uint32_t nbytes = size < nbytes_total ? size : nbytes_total;
|
||||
for (uint32_t j = 0; j < nbytes; ++j) {
|
||||
count += kBdzLookupIndex[g_.data()[offset + j]];
|
||||
}
|
||||
ranktable[i] = count;
|
||||
offset += nbytes;
|
||||
nbytes_total -= size;
|
||||
++i;
|
||||
}
|
||||
ranktable_.swap(ranktable);
|
||||
}
|
||||
|
||||
uint32_t MPHIndex::Rank(uint32_t vertex) const {
|
||||
if (ranktable_.empty()) return 0;
|
||||
uint32_t index = vertex >> b_;
|
||||
uint32_t base_rank = ranktable_[index];
|
||||
uint32_t beg_idx_v = index << b_;
|
||||
uint32_t beg_idx_b = beg_idx_v >> 2;
|
||||
uint32_t end_idx_b = vertex >> 2;
|
||||
while (beg_idx_b < end_idx_b) {
|
||||
assert(g_.data().size() > beg_idx_b);
|
||||
base_rank += kBdzLookupIndex[g_.data()[beg_idx_b++]];
|
||||
}
|
||||
beg_idx_v = beg_idx_b << 2;
|
||||
/*
|
||||
cerr << "beg_idx_v: " << beg_idx_v << endl;
|
||||
cerr << "base rank: " << base_rank << endl;
|
||||
cerr << "G: ";
|
||||
for (unsigned int i = 0; i < n_; ++i) {
|
||||
cerr << static_cast<uint32_t>(g_[i]) << " ";
|
||||
}
|
||||
cerr << endl;
|
||||
*/
|
||||
while (beg_idx_v < vertex) {
|
||||
if (g_[beg_idx_v] != kUnassigned) ++base_rank;
|
||||
++beg_idx_v;
|
||||
}
|
||||
// cerr << "Base rank: " << base_rank << endl;
|
||||
return base_rank;
|
||||
}
|
||||
|
||||
void MPHIndex::swap(std::vector<uint32_t>& params, dynamic_2bitset& g, std::vector<uint32_t>& ranktable) {
|
||||
params.resize(12);
|
||||
uint32_t rounded_c = c_ * 1000 * 1000;
|
||||
std::swap(params[0], rounded_c);
|
||||
c_ = static_cast<double>(rounded_c) / 1000 / 1000;
|
||||
std::swap(params[1], m_);
|
||||
std::swap(params[2], n_);
|
||||
std::swap(params[3], k_);
|
||||
uint32_t uint32_square = static_cast<uint32_t>(square_);
|
||||
std::swap(params[4], uint32_square);
|
||||
square_ = uint32_square;
|
||||
std::swap(params[5], hash_seed_[0]);
|
||||
std::swap(params[6], hash_seed_[1]);
|
||||
std::swap(params[7], hash_seed_[2]);
|
||||
g.swap(g_);
|
||||
ranktable.swap(ranktable_);
|
||||
}
|
||||
|
||||
} // namespace cxxmph
|
||||
270
deps/cmph/cxxmph/mph_index.h
vendored
Normal file
270
deps/cmph/cxxmph/mph_index.h
vendored
Normal file
@@ -0,0 +1,270 @@
|
||||
#ifndef __CXXMPH_MPH_INDEX_H__
|
||||
#define __CXXMPH_MPH_INDEX_H__
|
||||
|
||||
// Minimal perfect hash abstraction implementing the BDZ algorithm
|
||||
//
|
||||
// This is a data structure that given a set of known keys S, will create a
|
||||
// mapping from S to [0..|S|). The class is informed about S through the Reset
|
||||
// method and the mapping is queried by calling index(key).
|
||||
//
|
||||
// This is a pretty uncommon data structure, and if you application has a real
|
||||
// use case for it, chances are that it is a real win. If all you are doing is
|
||||
// a straightforward implementation of an in-memory associative mapping data
|
||||
// structure, then it will probably be slower. Take a look at mph_map.h
|
||||
// instead.
|
||||
//
|
||||
// Thesis presenting this and similar algorithms:
|
||||
// http://homepages.dcc.ufmg.br/~fbotelho/en/talks/thesis2008/thesis.pdf
|
||||
//
|
||||
// Notes:
|
||||
//
|
||||
// Most users can use the SimpleMPHIndex wrapper instead of the MPHIndex which
|
||||
// have confusing template parameters.
|
||||
// This class only implements a minimal perfect hash function, it does not
|
||||
// implement an associative mapping data structure.
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <cassert>
|
||||
#include <climits>
|
||||
#include <cmath>
|
||||
#include <unordered_map> // for std::hash
|
||||
#include <vector>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
using std::cerr;
|
||||
using std::endl;
|
||||
|
||||
#include "seeded_hash.h"
|
||||
#include "mph_bits.h"
|
||||
#include "trigraph.h"
|
||||
|
||||
namespace cxxmph {
|
||||
|
||||
class MPHIndex {
|
||||
public:
|
||||
MPHIndex(bool square = false, double c = 1.23, uint8_t b = 7) :
|
||||
c_(c), b_(b), m_(0), n_(0), k_(0), square_(square), r_(1), g_(8, true) {
|
||||
nest_displacement_[0] = 0;
|
||||
nest_displacement_[1] = r_;
|
||||
nest_displacement_[2] = (r_ << 1);
|
||||
}
|
||||
~MPHIndex();
|
||||
|
||||
template <class SeededHashFcn, class ForwardIterator>
|
||||
bool Reset(ForwardIterator begin, ForwardIterator end, uint32_t size);
|
||||
template <class SeededHashFcn, class Key> // must agree with Reset
|
||||
// Get a unique identifier for k, in the range [0;size()). If x wasn't part
|
||||
// of the input in the last Reset call, returns a random value.
|
||||
uint32_t index(const Key& x) const;
|
||||
uint32_t size() const { return m_; }
|
||||
void clear();
|
||||
|
||||
// Advanced users functions. Please avoid unless you know what you are doing.
|
||||
uint32_t perfect_hash_size() const { return n_; }
|
||||
template <class SeededHashFcn, class Key> // must agree with Reset
|
||||
uint32_t perfect_hash(const Key& x) const; // way faster than the minimal
|
||||
template <class SeededHashFcn, class Key> // must agree with Reset
|
||||
uint32_t perfect_square(const Key& x) const; // even faster but needs square=true
|
||||
uint32_t minimal_perfect_hash_size() const { return size(); }
|
||||
template <class SeededHashFcn, class Key> // must agree with Reset
|
||||
uint32_t minimal_perfect_hash(const Key& x) const;
|
||||
|
||||
// Experimental api to use as a serialization building block.
|
||||
// Since this signature exposes some implementation details, expect it to
|
||||
// change.
|
||||
void swap(std::vector<uint32_t>& params, dynamic_2bitset& g, std::vector<uint32_t>& ranktable);
|
||||
|
||||
private:
|
||||
template <class SeededHashFcn, class ForwardIterator>
|
||||
bool Mapping(ForwardIterator begin, ForwardIterator end,
|
||||
std::vector<TriGraph::Edge>* edges,
|
||||
std::vector<uint32_t>* queue);
|
||||
bool GenerateQueue(TriGraph* graph, std::vector<uint32_t>* queue);
|
||||
void Assigning(const std::vector<TriGraph::Edge>& edges,
|
||||
const std::vector<uint32_t>& queue);
|
||||
void Ranking();
|
||||
uint32_t Rank(uint32_t vertex) const;
|
||||
|
||||
// Algorithm parameters
|
||||
// Perfect hash function density. If this was a 2graph,
|
||||
// then probability of having an acyclic graph would be
|
||||
// sqrt(1-(2/c)^2). See section 3 for details.
|
||||
// http://www.it-c.dk/people/pagh/papers/simpleperf.pdf
|
||||
double c_;
|
||||
uint8_t b_; // Number of bits of the kth index in the ranktable
|
||||
|
||||
// Values used during generation
|
||||
uint32_t m_; // edges count
|
||||
uint32_t n_; // vertex count
|
||||
uint32_t k_; // kth index in ranktable, $k = log_2(n=3r)\varepsilon$
|
||||
bool square_; // make bit vector size a power of 2
|
||||
|
||||
// Values used during search
|
||||
|
||||
// Partition vertex count, derived from c parameter.
|
||||
uint32_t r_;
|
||||
uint32_t nest_displacement_[3]; // derived from r_
|
||||
|
||||
// The array containing the minimal perfect hash function graph.
|
||||
dynamic_2bitset g_;
|
||||
uint8_t threebit_mod3[10]; // speed up mod3 calculation for 3bit ints
|
||||
// The table used for the rank step of the minimal perfect hash function
|
||||
std::vector<uint32_t> ranktable_;
|
||||
// The selected hash seed triplet for finding the edges in the minimal
|
||||
// perfect hash function graph.
|
||||
uint32_t hash_seed_[3];
|
||||
};
|
||||
|
||||
// Template method needs to go in the header file.
|
||||
template <class SeededHashFcn, class ForwardIterator>
|
||||
bool MPHIndex::Reset(
|
||||
ForwardIterator begin, ForwardIterator end, uint32_t size) {
|
||||
if (end == begin) {
|
||||
clear();
|
||||
return true;
|
||||
}
|
||||
m_ = size;
|
||||
r_ = static_cast<uint32_t>(ceil((c_*m_)/3));
|
||||
if ((r_ % 2) == 0) r_ += 1;
|
||||
// This can be used to speed mods, but increases occupation too much.
|
||||
// Needs to try http://gmplib.org/manual/Integer-Exponentiation.html instead
|
||||
if (square_) r_ = nextpoweroftwo(r_);
|
||||
nest_displacement_[0] = 0;
|
||||
nest_displacement_[1] = r_;
|
||||
nest_displacement_[2] = (r_ << 1);
|
||||
for (uint32_t i = 0; i < sizeof(threebit_mod3); ++i) threebit_mod3[i] = i % 3;
|
||||
|
||||
n_ = 3*r_;
|
||||
k_ = 1U << b_;
|
||||
|
||||
// cerr << "m " << m_ << " n " << n_ << " r " << r_ << endl;
|
||||
|
||||
int iterations = 1000;
|
||||
std::vector<TriGraph::Edge> edges;
|
||||
std::vector<uint32_t> queue;
|
||||
while (1) {
|
||||
// cerr << "Iterations missing: " << iterations << endl;
|
||||
for (int i = 0; i < 3; ++i) hash_seed_[i] = random();
|
||||
if (Mapping<SeededHashFcn>(begin, end, &edges, &queue)) break;
|
||||
else --iterations;
|
||||
if (iterations == 0) break;
|
||||
}
|
||||
if (iterations == 0) return false;
|
||||
Assigning(edges, queue);
|
||||
std::vector<TriGraph::Edge>().swap(edges);
|
||||
Ranking();
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class SeededHashFcn, class ForwardIterator>
|
||||
bool MPHIndex::Mapping(
|
||||
ForwardIterator begin, ForwardIterator end,
|
||||
std::vector<TriGraph::Edge>* edges, std::vector<uint32_t>* queue) {
|
||||
TriGraph graph(n_, m_);
|
||||
for (ForwardIterator it = begin; it != end; ++it) {
|
||||
h128 h = SeededHashFcn().hash128(*it, hash_seed_[0]);
|
||||
// for (int i = 0; i < 3; ++i) h[i] = SeededHashFcn()(*it, hash_seed_[i]);
|
||||
uint32_t v0 = h[0] % r_;
|
||||
uint32_t v1 = h[1] % r_ + r_;
|
||||
uint32_t v2 = h[2] % r_ + (r_ << 1);
|
||||
// cerr << "Key: " << *it << " edge " << it - begin << " (" << v0 << "," << v1 << "," << v2 << ")" << endl;
|
||||
graph.AddEdge(TriGraph::Edge(v0, v1, v2));
|
||||
}
|
||||
if (GenerateQueue(&graph, queue)) {
|
||||
graph.ExtractEdgesAndClear(edges);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template <class SeededHashFcn, class Key>
|
||||
uint32_t MPHIndex::perfect_square(const Key& key) const {
|
||||
h128 h = SeededHashFcn().hash128(key, hash_seed_[0]);
|
||||
h[0] = (h[0] & (r_-1)) + nest_displacement_[0];
|
||||
h[1] = (h[1] & (r_-1)) + nest_displacement_[1];
|
||||
h[2] = (h[2] & (r_-1)) + nest_displacement_[2];
|
||||
assert((h[0]) < g_.size());
|
||||
assert((h[1]) < g_.size());
|
||||
assert((h[2]) < g_.size());
|
||||
uint8_t nest = threebit_mod3[g_[h[0]] + g_[h[1]] + g_[h[2]]];
|
||||
uint32_t vertex = h[nest];
|
||||
return vertex;
|
||||
}
|
||||
|
||||
template <class SeededHashFcn, class Key>
|
||||
uint32_t MPHIndex::perfect_hash(const Key& key) const {
|
||||
if (!g_.size()) return 0;
|
||||
h128 h = SeededHashFcn().hash128(key, hash_seed_[0]);
|
||||
h[0] = (h[0] % r_) + nest_displacement_[0];
|
||||
h[1] = (h[1] % r_) + nest_displacement_[1];
|
||||
h[2] = (h[2] % r_) + nest_displacement_[2];
|
||||
assert((h[0]) < g_.size());
|
||||
assert((h[1]) < g_.size());
|
||||
assert((h[2]) < g_.size());
|
||||
uint8_t nest = threebit_mod3[g_[h[0]] + g_[h[1]] + g_[h[2]]];
|
||||
uint32_t vertex = h[nest];
|
||||
return vertex;
|
||||
}
|
||||
|
||||
template <class SeededHashFcn, class Key>
|
||||
uint32_t MPHIndex::minimal_perfect_hash(const Key& key) const {
|
||||
return Rank(perfect_hash<SeededHashFcn, Key>(key));
|
||||
}
|
||||
|
||||
template <class SeededHashFcn, class Key>
|
||||
uint32_t MPHIndex::index(const Key& key) const {
|
||||
return minimal_perfect_hash<SeededHashFcn, Key>(key);
|
||||
}
|
||||
|
||||
// Simple wrapper around MPHIndex to simplify calling code. Please refer to the
|
||||
// MPHIndex class for documentation.
|
||||
template <class Key, class HashFcn = typename seeded_hash<std::hash<Key>>::hash_function>
|
||||
class SimpleMPHIndex : public MPHIndex {
|
||||
public:
|
||||
SimpleMPHIndex(bool advanced_usage = false) : MPHIndex(advanced_usage) {}
|
||||
template <class ForwardIterator>
|
||||
bool Reset(ForwardIterator begin, ForwardIterator end, uint32_t size) {
|
||||
return MPHIndex::Reset<HashFcn>(begin, end, size);
|
||||
}
|
||||
uint32_t index(const Key& key) const { return MPHIndex::index<HashFcn>(key); }
|
||||
};
|
||||
|
||||
// The parameters minimal and square trade memory usage for evaluation speed.
|
||||
// Minimal decreases speed and memory usage, and square does the opposite.
|
||||
// Using minimal=true and square=false is the same as SimpleMPHIndex.
|
||||
template <bool minimal, bool square, class Key, class HashFcn>
|
||||
struct FlexibleMPHIndex {};
|
||||
|
||||
template <class Key, class HashFcn>
|
||||
struct FlexibleMPHIndex<true, false, Key, HashFcn>
|
||||
: public SimpleMPHIndex<Key, HashFcn> {
|
||||
FlexibleMPHIndex() : SimpleMPHIndex<Key, HashFcn>(false) {}
|
||||
uint32_t index(const Key& key) const {
|
||||
return MPHIndex::minimal_perfect_hash<HashFcn>(key); }
|
||||
uint32_t size() const { return MPHIndex::minimal_perfect_hash_size(); }
|
||||
};
|
||||
template <class Key, class HashFcn>
|
||||
struct FlexibleMPHIndex<false, true, Key, HashFcn>
|
||||
: public SimpleMPHIndex<Key, HashFcn> {
|
||||
FlexibleMPHIndex() : SimpleMPHIndex<Key, HashFcn>(true) {}
|
||||
uint32_t index(const Key& key) const {
|
||||
return MPHIndex::perfect_square<HashFcn>(key); }
|
||||
uint32_t size() const { return MPHIndex::perfect_hash_size(); }
|
||||
};
|
||||
template <class Key, class HashFcn>
|
||||
struct FlexibleMPHIndex<false, false, Key, HashFcn>
|
||||
: public SimpleMPHIndex<Key, HashFcn> {
|
||||
FlexibleMPHIndex() : SimpleMPHIndex<Key, HashFcn>(false) {}
|
||||
uint32_t index(const Key& key) const {
|
||||
return MPHIndex::perfect_hash<HashFcn>(key); }
|
||||
uint32_t size() const { return MPHIndex::perfect_hash_size(); }
|
||||
};
|
||||
// From a trade-off perspective this case does not make much sense.
|
||||
// template <class Key, class HashFcn>
|
||||
// class FlexibleMPHIndex<true, true, Key, HashFcn>
|
||||
|
||||
} // namespace cxxmph
|
||||
|
||||
#endif // __CXXMPH_MPH_INDEX_H__
|
||||
53
deps/cmph/cxxmph/mph_index_test.cc
vendored
Normal file
53
deps/cmph/cxxmph/mph_index_test.cc
vendored
Normal file
@@ -0,0 +1,53 @@
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "mph_index.h"
|
||||
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using namespace cxxmph;
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
||||
srand(1);
|
||||
vector<string> keys;
|
||||
keys.push_back("davi");
|
||||
keys.push_back("paulo");
|
||||
keys.push_back("joao");
|
||||
keys.push_back("maria");
|
||||
keys.push_back("bruno");
|
||||
keys.push_back("paula");
|
||||
keys.push_back("diego");
|
||||
keys.push_back("diogo");
|
||||
keys.push_back("algume");
|
||||
|
||||
SimpleMPHIndex<string> mph_index;
|
||||
if (!mph_index.Reset(keys.begin(), keys.end(), keys.size())) { exit(-1); }
|
||||
vector<int> ids;
|
||||
for (vector<int>::size_type i = 0; i < keys.size(); ++i) {
|
||||
ids.push_back(mph_index.index(keys[i]));
|
||||
cerr << " " << *(ids.end() - 1);
|
||||
}
|
||||
cerr << endl;
|
||||
sort(ids.begin(), ids.end());
|
||||
for (vector<int>::size_type i = 0; i < ids.size(); ++i) assert(ids[i] == static_cast<vector<int>::value_type>(i));
|
||||
|
||||
// Test serialization
|
||||
vector<uint32_t> params;
|
||||
dynamic_2bitset g;
|
||||
vector<uint32_t> ranktable;
|
||||
mph_index.swap(params, g, ranktable);
|
||||
assert(mph_index.size() == 0);
|
||||
mph_index.swap(params, g, ranktable);
|
||||
assert(mph_index.size() == ids.size());
|
||||
for (vector<int>::size_type i = 0; i < ids.size(); ++i) assert(ids[i] == static_cast<vector<int>::value_type>(i));
|
||||
|
||||
FlexibleMPHIndex<false, true, int64_t, seeded_hash<std::hash<int64_t>>::hash_function> square_empty;
|
||||
auto id = square_empty.index(1);
|
||||
FlexibleMPHIndex<false, false, int64_t, seeded_hash<std::hash<int64_t>>::hash_function> unordered_empty;
|
||||
id ^= unordered_empty.index(1);
|
||||
FlexibleMPHIndex<true, false, int64_t, seeded_hash<std::hash<int64_t>>::hash_function> minimal_empty;
|
||||
id ^= minimal_empty.index(1);
|
||||
}
|
||||
272
deps/cmph/cxxmph/mph_map.h
vendored
Normal file
272
deps/cmph/cxxmph/mph_map.h
vendored
Normal file
@@ -0,0 +1,272 @@
|
||||
#ifndef __CXXMPH_MPH_MAP_H__
|
||||
#define __CXXMPH_MPH_MAP_H__
|
||||
// Implementation of the unordered associative mapping interface using a
|
||||
// minimal perfect hash function.
|
||||
//
|
||||
// Since these are header-mostly libraries, make sure you compile your code
|
||||
// with -DNDEBUG and -O3. The code requires a modern C++11 compiler.
|
||||
//
|
||||
// The container comes in 3 flavors, all in the cxxmph namespace and drop-in
|
||||
// replacement for the popular classes with the same names.
|
||||
// * dense_hash_map
|
||||
// -> fast, uses more memory, 2.93 bits per bucket, ~50% occupation
|
||||
// * unordered_map (aliases: hash_map, mph_map)
|
||||
// -> middle ground, uses 2.93 bits per bucket, ~81% occupation
|
||||
// * sparse_hash_map -> slower, uses 3.6 bits per bucket
|
||||
// -> less fast, uses 3.6 bits per bucket, 100% occupation
|
||||
//
|
||||
// Those classes are not necessarily faster than their existing counterparts.
|
||||
// Benchmark your code before using it. The larger the key, the larger the
|
||||
// number of elements inserted, and the bigger the number of failed searches,
|
||||
// the more likely those classes will outperform existing code.
|
||||
//
|
||||
// For large sets of urls (>100k), which are a somewhat expensive to compare, I
|
||||
// found those class to be about 10%-50% faster than unordered_map.
|
||||
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
#include <utility> // for std::pair
|
||||
|
||||
#include "string_util.h"
|
||||
#include "hollow_iterator.h"
|
||||
#include "mph_bits.h"
|
||||
#include "mph_index.h"
|
||||
#include "seeded_hash.h"
|
||||
|
||||
namespace cxxmph {
|
||||
|
||||
using std::pair;
|
||||
using std::make_pair;
|
||||
using std::vector;
|
||||
|
||||
// Save on repetitive typing.
|
||||
#define MPH_MAP_TMPL_SPEC \
|
||||
template <bool minimal, bool square, \
|
||||
class Key, class Data, class HashFcn, class EqualKey, class Alloc>
|
||||
#define MPH_MAP_CLASS_SPEC mph_map_base<minimal, square, Key, Data, HashFcn, EqualKey, Alloc>
|
||||
#define MPH_MAP_METHOD_DECL(r, m) MPH_MAP_TMPL_SPEC typename MPH_MAP_CLASS_SPEC::r MPH_MAP_CLASS_SPEC::m
|
||||
#define MPH_MAP_INLINE_METHOD_DECL(r, m) MPH_MAP_TMPL_SPEC inline typename MPH_MAP_CLASS_SPEC::r MPH_MAP_CLASS_SPEC::m
|
||||
|
||||
template <bool minimal, bool square, class Key, class Data, class HashFcn = std::hash<Key>, class EqualKey = std::equal_to<Key>, class Alloc = std::allocator<Data> >
|
||||
class mph_map_base {
|
||||
public:
|
||||
typedef Key key_type;
|
||||
typedef Data data_type;
|
||||
typedef pair<Key, Data> value_type;
|
||||
typedef HashFcn hasher;
|
||||
typedef EqualKey key_equal;
|
||||
|
||||
typedef typename vector<value_type>::pointer pointer;
|
||||
typedef typename vector<value_type>::reference reference;
|
||||
typedef typename vector<value_type>::const_reference const_reference;
|
||||
typedef typename vector<value_type>::size_type size_type;
|
||||
typedef typename vector<value_type>::difference_type difference_type;
|
||||
|
||||
typedef is_empty<const vector<value_type>> is_empty_type;
|
||||
typedef hollow_iterator_base<typename vector<value_type>::iterator, is_empty_type> iterator;
|
||||
typedef hollow_iterator_base<typename vector<value_type>::const_iterator, is_empty_type> const_iterator;
|
||||
|
||||
// For making macros simpler.
|
||||
typedef void void_type;
|
||||
typedef bool bool_type;
|
||||
typedef pair<iterator, bool> insert_return_type;
|
||||
|
||||
mph_map_base();
|
||||
~mph_map_base();
|
||||
|
||||
iterator begin();
|
||||
iterator end();
|
||||
const_iterator begin() const;
|
||||
const_iterator end() const;
|
||||
size_type size() const;
|
||||
bool empty() const;
|
||||
void clear();
|
||||
void erase(iterator pos);
|
||||
void erase(const key_type& k);
|
||||
pair<iterator, bool> insert(const value_type& x);
|
||||
inline iterator find(const key_type& k);
|
||||
inline const_iterator find(const key_type& k) const;
|
||||
typedef int32_t my_int32_t; // help macros
|
||||
inline int32_t index(const key_type& k) const;
|
||||
data_type& operator[](const key_type &k);
|
||||
const data_type& operator[](const key_type &k) const;
|
||||
|
||||
size_type bucket_count() const { return index_.size() + slack_.bucket_count(); }
|
||||
void rehash(size_type nbuckets /*ignored*/);
|
||||
|
||||
protected: // mimicking STL implementation
|
||||
EqualKey equal_;
|
||||
|
||||
private:
|
||||
template <typename iterator>
|
||||
struct iterator_first : public iterator {
|
||||
iterator_first(iterator it) : iterator(it) { }
|
||||
const typename iterator::value_type::first_type& operator*() {
|
||||
return this->iterator::operator*().first;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename iterator>
|
||||
iterator_first<iterator> make_iterator_first(iterator it) {
|
||||
return iterator_first<iterator>(it);
|
||||
}
|
||||
|
||||
void pack();
|
||||
vector<value_type> values_;
|
||||
vector<bool> present_;
|
||||
FlexibleMPHIndex<minimal, square, Key, typename seeded_hash<HashFcn>::hash_function> index_;
|
||||
// TODO(davi) optimize slack to use hash from index rather than calculate its own
|
||||
typedef std::unordered_map<h128, uint32_t, h128::hash32> slack_type;
|
||||
slack_type slack_;
|
||||
size_type size_;
|
||||
typename seeded_hash<HashFcn>::hash_function hasher128_;
|
||||
};
|
||||
|
||||
MPH_MAP_TMPL_SPEC
|
||||
bool operator==(const MPH_MAP_CLASS_SPEC& lhs, const MPH_MAP_CLASS_SPEC& rhs) {
|
||||
return lhs.size() == rhs.size() && std::equal(lhs.begin(), lhs.end(), rhs.begin());
|
||||
}
|
||||
|
||||
MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::mph_map_base() : size_(0) {
|
||||
clear();
|
||||
pack();
|
||||
}
|
||||
MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::~mph_map_base() { }
|
||||
|
||||
MPH_MAP_METHOD_DECL(insert_return_type, insert)(const value_type& x) {
|
||||
auto it = find(x.first);
|
||||
auto it_end = end();
|
||||
if (it != it_end) return make_pair(it, false);
|
||||
bool should_pack = false;
|
||||
if (values_.capacity() == values_.size() && values_.size() > 256) {
|
||||
should_pack = true;
|
||||
}
|
||||
values_.push_back(x);
|
||||
present_.push_back(true);
|
||||
++size_;
|
||||
h128 h = hasher128_.hash128(x.first, 0);
|
||||
if (slack_.find(h) != slack_.end()) should_pack = true; // unavoidable pack
|
||||
else slack_.insert(std::make_pair(h, values_.size() - 1));
|
||||
if (should_pack) pack();
|
||||
it = find(x.first);
|
||||
return make_pair(it, true);
|
||||
}
|
||||
|
||||
MPH_MAP_METHOD_DECL(void_type, pack)() {
|
||||
// CXXMPH_DEBUGLN("Packing %v values")(values_.size());
|
||||
if (values_.empty()) return;
|
||||
assert(std::unordered_set<key_type>(make_iterator_first(begin()), make_iterator_first(end())).size() == size());
|
||||
bool success = index_.Reset(
|
||||
make_iterator_first(begin()),
|
||||
make_iterator_first(end()), size_);
|
||||
if (!success) { exit(-1); }
|
||||
vector<value_type> new_values(index_.size());
|
||||
new_values.reserve(new_values.size() * 2);
|
||||
vector<bool> new_present(index_.size(), false);
|
||||
new_present.reserve(new_present.size() * 2);
|
||||
for (iterator it = begin(), it_end = end(); it != it_end; ++it) {
|
||||
size_type id = index_.index(it->first);
|
||||
assert(id < index_.size());
|
||||
assert(id < new_values.size());
|
||||
new_values[id] = *it;
|
||||
new_present[id] = true;
|
||||
}
|
||||
// fprintf(stderr, "Collision ratio: %f\n", collisions*1.0/size());
|
||||
values_.swap(new_values);
|
||||
present_.swap(new_present);
|
||||
slack_type().swap(slack_);
|
||||
}
|
||||
|
||||
MPH_MAP_METHOD_DECL(iterator, begin)() { return make_hollow(&values_, &present_, values_.begin()); }
|
||||
MPH_MAP_METHOD_DECL(iterator, end)() { return make_solid(&values_, &present_, values_.end()); }
|
||||
MPH_MAP_METHOD_DECL(const_iterator, begin)() const { return make_hollow(&values_, &present_, values_.begin()); }
|
||||
MPH_MAP_METHOD_DECL(const_iterator, end)() const { return make_solid(&values_, &present_, values_.end()); }
|
||||
MPH_MAP_METHOD_DECL(bool_type, empty)() const { return size_ == 0; }
|
||||
MPH_MAP_METHOD_DECL(size_type, size)() const { return size_; }
|
||||
|
||||
MPH_MAP_METHOD_DECL(void_type, clear)() {
|
||||
values_.clear();
|
||||
present_.clear();
|
||||
slack_.clear();
|
||||
index_.clear();
|
||||
size_ = 0;
|
||||
}
|
||||
|
||||
MPH_MAP_METHOD_DECL(void_type, erase)(iterator pos) {
|
||||
assert(pos.it_ - values_.begin() < present_.size());
|
||||
assert(present_[pos.it_ - values_.begin()]);
|
||||
present_[pos.it_ - values_.begin()] = false;
|
||||
*pos = value_type();
|
||||
--size_;
|
||||
}
|
||||
MPH_MAP_METHOD_DECL(void_type, erase)(const key_type& k) {
|
||||
iterator it = find(k);
|
||||
if (it == end()) return;
|
||||
erase(it);
|
||||
}
|
||||
|
||||
MPH_MAP_INLINE_METHOD_DECL(const_iterator, find)(const key_type& k) const {
|
||||
auto idx = index(k);
|
||||
typename vector<value_type>::const_iterator vit = values_.begin() + idx;
|
||||
if (idx == -1 || !equal_(vit->first, k)) return end();
|
||||
return make_solid(&values_, &present_, vit);;
|
||||
}
|
||||
|
||||
MPH_MAP_INLINE_METHOD_DECL(iterator, find)(const key_type& k) {
|
||||
auto idx = index(k);
|
||||
typename vector<value_type>::iterator vit = values_.begin() + idx;
|
||||
if (idx == -1 || !equal_(vit->first, k)) return end();
|
||||
return make_solid(&values_, &present_, vit);;
|
||||
}
|
||||
|
||||
MPH_MAP_INLINE_METHOD_DECL(my_int32_t, index)(const key_type& k) const {
|
||||
if (__builtin_expect(!slack_.empty(), 0)) {
|
||||
auto sit = slack_.find(hasher128_.hash128(k, 0));
|
||||
if (sit != slack_.end()) return sit->second;
|
||||
}
|
||||
if (__builtin_expect(index_.size(), 1)) {
|
||||
auto id = index_.index(k);
|
||||
if (__builtin_expect(present_[id], true)) return id;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
MPH_MAP_METHOD_DECL(data_type&, operator[])(const key_type& k) {
|
||||
return insert(make_pair(k, data_type())).first->second;
|
||||
}
|
||||
MPH_MAP_METHOD_DECL(void_type, rehash)(size_type /*nbuckets*/) {
|
||||
pack();
|
||||
vector<value_type>(values_.begin(), values_.end()).swap(values_);
|
||||
vector<bool>(present_.begin(), present_.end()).swap(present_);
|
||||
slack_type().swap(slack_);
|
||||
}
|
||||
|
||||
#define MPH_MAP_PREAMBLE template <class Key, class Data,\
|
||||
class HashFcn = std::hash<Key>, class EqualKey = std::equal_to<Key>,\
|
||||
class Alloc = std::allocator<Data> >
|
||||
|
||||
MPH_MAP_PREAMBLE class mph_map : public mph_map_base<
|
||||
false, false, Key, Data, HashFcn, EqualKey, Alloc> {};
|
||||
MPH_MAP_PREAMBLE class unordered_map : public mph_map_base<
|
||||
false, false, Key, Data, HashFcn, EqualKey, Alloc> {};
|
||||
MPH_MAP_PREAMBLE class hash_map : public mph_map_base<
|
||||
false, false, Key, Data, HashFcn, EqualKey, Alloc> {};
|
||||
|
||||
MPH_MAP_PREAMBLE class dense_hash_map : public mph_map_base<
|
||||
false, true, Key, Data, HashFcn, EqualKey, Alloc> {};
|
||||
MPH_MAP_PREAMBLE class sparse_hash_map : public mph_map_base<
|
||||
true, false, Key, Data, HashFcn, EqualKey, Alloc> {};
|
||||
|
||||
#undef MPH_MAP_TMPL_SPEC
|
||||
#undef MPH_MAP_CLASS_SPEC
|
||||
#undef MPH_MAP_METHOD_DECL
|
||||
#undef MPH_MAP_INLINE_METHOD_DECL
|
||||
#undef MPH_MAP_PREAMBLE
|
||||
|
||||
} // namespace cxxmph
|
||||
|
||||
#endif // __CXXMPH_MPH_MAP_H__
|
||||
25
deps/cmph/cxxmph/mph_map_test.cc
vendored
Normal file
25
deps/cmph/cxxmph/mph_map_test.cc
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "mph_map.h"
|
||||
#include "map_tester.h"
|
||||
#include "test.h"
|
||||
|
||||
using namespace cxxmph;
|
||||
|
||||
typedef MapTester<mph_map> Tester;
|
||||
|
||||
CXXMPH_CXX_TEST_CASE(empty_find, Tester::empty_find);
|
||||
CXXMPH_CXX_TEST_CASE(empty_erase, Tester::empty_erase);
|
||||
CXXMPH_CXX_TEST_CASE(small_insert, Tester::small_insert);
|
||||
CXXMPH_CXX_TEST_CASE(large_insert, Tester::large_insert);
|
||||
CXXMPH_CXX_TEST_CASE(small_search, Tester::small_search);
|
||||
CXXMPH_CXX_TEST_CASE(default_search, Tester::default_search);
|
||||
CXXMPH_CXX_TEST_CASE(large_search, Tester::large_search);
|
||||
CXXMPH_CXX_TEST_CASE(string_search, Tester::string_search);
|
||||
CXXMPH_CXX_TEST_CASE(rehash_zero, Tester::rehash_zero);
|
||||
CXXMPH_CXX_TEST_CASE(rehash_size, Tester::rehash_size);
|
||||
CXXMPH_CXX_TEST_CASE(erase_value, Tester::erase_value);
|
||||
CXXMPH_CXX_TEST_CASE(erase_iterator, Tester::erase_iterator);
|
||||
147
deps/cmph/cxxmph/seeded_hash.h
vendored
Normal file
147
deps/cmph/cxxmph/seeded_hash.h
vendored
Normal file
@@ -0,0 +1,147 @@
|
||||
#ifndef __CXXMPH_SEEDED_HASH_H__
|
||||
#define __CXXMPH_SEEDED_HASH_H__
|
||||
|
||||
#include <stdint.h> // for uint32_t and friends
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <unordered_map> // for std::hash
|
||||
|
||||
#include "MurmurHash3.h"
|
||||
#include "stringpiece.h"
|
||||
|
||||
namespace cxxmph {
|
||||
|
||||
struct h128 {
|
||||
const uint32_t& operator[](uint8_t i) const { return uint32[i]; }
|
||||
uint32_t& operator[](uint8_t i) { return uint32[i]; }
|
||||
uint64_t get64(bool second) const { return (static_cast<uint64_t>(uint32[second << 1]) << 32) | uint32[1 + (second << 1)]; }
|
||||
void set64(uint64_t v, bool second) { uint32[second << 1] = v >> 32; uint32[1+(second<<1)] = ((v << 32) >> 32); }
|
||||
bool operator==(const h128 rhs) const { return memcmp(uint32, rhs.uint32, sizeof(uint32)) == 0; }
|
||||
|
||||
uint32_t uint32[4];
|
||||
|
||||
struct hash32 { uint32_t operator()(const cxxmph::h128& h) const { return h[3]; } };
|
||||
};
|
||||
|
||||
template <class HashFcn>
|
||||
struct seeded_hash_function {
|
||||
template <class Key>
|
||||
uint32_t operator()(const Key& k, uint32_t seed) const {
|
||||
uint32_t h;
|
||||
uint32_t h0 = HashFcn()(k);
|
||||
MurmurHash3_x86_32(reinterpret_cast<const void*>(&h0), 4, seed, &h);
|
||||
return h;
|
||||
}
|
||||
template <class Key>
|
||||
h128 hash128(const Key& k, uint32_t seed) const {
|
||||
h128 h;
|
||||
uint32_t h0 = HashFcn()(k);
|
||||
MurmurHash3_x64_128(reinterpret_cast<const void*>(&h0), 4, seed, &h);
|
||||
return h;
|
||||
}
|
||||
};
|
||||
|
||||
struct Murmur3 {
|
||||
template<class Key>
|
||||
uint32_t operator()(const Key& k) const {
|
||||
uint32_t out;
|
||||
MurmurHash3_x86_32(reinterpret_cast<const void*>(&k), sizeof(Key), 1 /* seed */, &out);
|
||||
return out;
|
||||
}
|
||||
template <class Key>
|
||||
h128 hash128(const Key& k) const {
|
||||
h128 h;
|
||||
MurmurHash3_x64_128(reinterpret_cast<const void*>(&k), sizeof(Key), 1 /* seed */, &h);
|
||||
return h;
|
||||
}
|
||||
};
|
||||
|
||||
struct Murmur3StringPiece {
|
||||
template <class Key>
|
||||
uint32_t operator()(const Key& k) const {
|
||||
StringPiece s(k);
|
||||
uint32_t out;
|
||||
MurmurHash3_x86_32(s.data(), s.length(), 1 /* seed */, &out);
|
||||
return out;
|
||||
}
|
||||
template <class Key>
|
||||
h128 hash128(const Key& k) const {
|
||||
h128 h;
|
||||
StringPiece s(k);
|
||||
MurmurHash3_x64_128(s.data(), s.length(), 1 /* seed */, &h);
|
||||
return h;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct seeded_hash_function<Murmur3> {
|
||||
template <class Key>
|
||||
uint32_t operator()(const Key& k, uint32_t seed) const {
|
||||
uint32_t out;
|
||||
MurmurHash3_x86_32(reinterpret_cast<const void*>(&k), sizeof(Key), seed, &out);
|
||||
return out;
|
||||
}
|
||||
template <class Key>
|
||||
h128 hash128(const Key& k, uint32_t seed) const {
|
||||
h128 h;
|
||||
MurmurHash3_x64_128(reinterpret_cast<const void*>(&k), sizeof(Key), seed, &h);
|
||||
return h;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct seeded_hash_function<Murmur3StringPiece> {
|
||||
template <class Key>
|
||||
uint32_t operator()(const Key& k, uint32_t seed) const {
|
||||
StringPiece s(k);
|
||||
uint32_t out;
|
||||
MurmurHash3_x86_32(s.data(), s.length(), seed, &out);
|
||||
return out;
|
||||
}
|
||||
template <class Key>
|
||||
h128 hash128(const Key& k, uint32_t seed) const {
|
||||
h128 h;
|
||||
StringPiece s(k);
|
||||
MurmurHash3_x64_128(s.data(), s.length(), seed, &h);
|
||||
return h;
|
||||
}
|
||||
};
|
||||
|
||||
template <class HashFcn> struct seeded_hash
|
||||
{ typedef seeded_hash_function<HashFcn> hash_function; };
|
||||
// Use Murmur3 instead for all types defined in std::hash, plus
|
||||
// std::string which is commonly extended.
|
||||
template <> struct seeded_hash<std::hash<char*> >
|
||||
{ typedef seeded_hash_function<Murmur3StringPiece> hash_function; };
|
||||
template <> struct seeded_hash<std::hash<const char*> >
|
||||
{ typedef seeded_hash_function<Murmur3StringPiece> hash_function; };
|
||||
template <> struct seeded_hash<std::hash<std::string> >
|
||||
{ typedef seeded_hash_function<Murmur3StringPiece> hash_function; };
|
||||
template <> struct seeded_hash<std::hash<cxxmph::StringPiece> >
|
||||
{ typedef seeded_hash_function<Murmur3StringPiece> hash_function; };
|
||||
|
||||
template <> struct seeded_hash<std::hash<char> >
|
||||
{ typedef seeded_hash_function<Murmur3> hash_function; };
|
||||
template <> struct seeded_hash<std::hash<unsigned char> >
|
||||
{ typedef seeded_hash_function<Murmur3> hash_function; };
|
||||
template <> struct seeded_hash<std::hash<short> >
|
||||
{ typedef seeded_hash_function<Murmur3> hash_function; };
|
||||
template <> struct seeded_hash<std::hash<unsigned short> >
|
||||
{ typedef seeded_hash_function<Murmur3> hash_function; };
|
||||
template <> struct seeded_hash<std::hash<int> >
|
||||
{ typedef seeded_hash_function<Murmur3> hash_function; };
|
||||
template <> struct seeded_hash<std::hash<unsigned int> >
|
||||
{ typedef seeded_hash_function<Murmur3> hash_function; };
|
||||
template <> struct seeded_hash<std::hash<long> >
|
||||
{ typedef seeded_hash_function<Murmur3> hash_function; };
|
||||
template <> struct seeded_hash<std::hash<unsigned long> >
|
||||
{ typedef seeded_hash_function<Murmur3> hash_function; };
|
||||
template <> struct seeded_hash<std::hash<long long> >
|
||||
{ typedef seeded_hash_function<Murmur3> hash_function; };
|
||||
template <> struct seeded_hash<std::hash<unsigned long long> >
|
||||
{ typedef seeded_hash_function<Murmur3> hash_function; };
|
||||
|
||||
} // namespace cxxmph
|
||||
|
||||
#endif // __CXXMPH_SEEDED_HASH_H__
|
||||
59
deps/cmph/cxxmph/seeded_hash_test.cc
vendored
Normal file
59
deps/cmph/cxxmph/seeded_hash_test.cc
vendored
Normal file
@@ -0,0 +1,59 @@
|
||||
#include "seeded_hash.h"
|
||||
|
||||
#include <unordered_map>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
||||
using std::cerr;
|
||||
using std::endl;
|
||||
using std::string;
|
||||
using std::unordered_map;
|
||||
using namespace cxxmph;
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
auto hasher = seeded_hash_function<Murmur3StringPiece>();
|
||||
string key1("0");
|
||||
string key2("1");
|
||||
auto h1 = hasher.hash128(key1, 1);
|
||||
auto h2 = hasher.hash128(key2, 1);
|
||||
if (h1 == h2) {
|
||||
fprintf(stderr, "unexpected murmur collision\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
unordered_map<uint64_t, int> g;
|
||||
for (int i = 0; i < 1000; ++i) g[i] = i;
|
||||
for (int i = 0; i < 1000; ++i) if (g[i] != i) exit(-1);
|
||||
|
||||
auto inthasher = seeded_hash_function<std::hash<uint64_t>>();
|
||||
unordered_map<h128, uint64_t, h128::hash32> g2;
|
||||
for (uint64_t i = 0; i < 1000; ++i) {
|
||||
auto h = inthasher.hash128(i, 0);
|
||||
if (g2.find(h) != g2.end()) {
|
||||
std::cerr << "Incorrectly found " << i << std::endl;
|
||||
exit(-1);
|
||||
}
|
||||
if (h128::hash32()(h) != h[3]) {
|
||||
cerr << "Buggy hash method." << endl;
|
||||
exit(-1);
|
||||
}
|
||||
auto h2 = inthasher.hash128(i, 0);
|
||||
if (!(h == h2)) {
|
||||
cerr << "h 64(0) " << h.get64(0) << " h 64(1) " << h.get64(1) << endl;
|
||||
cerr << " h2 64(0) " << h2.get64(0) << " h2 64(1) " << h2.get64(1) << endl;
|
||||
cerr << "Broken equality for h128" << endl;
|
||||
exit(-1);
|
||||
}
|
||||
if (h128::hash32()(h) != h128::hash32()(h2)) {
|
||||
cerr << "Inconsistent hash method." << endl;
|
||||
exit(-1);
|
||||
}
|
||||
g2[h] = i;
|
||||
if (g2.find(h) == g2.end()) {
|
||||
std::cerr << "Incorrectly missed " << i << std::endl;
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
for (uint64_t i = 0; i < 1000; ++i) if (g2[inthasher.hash128(i, 0)] != i) exit(-1);
|
||||
}
|
||||
23
deps/cmph/cxxmph/string_util.cc
vendored
Normal file
23
deps/cmph/cxxmph/string_util.cc
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
#include "string_util.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace cxxmph {
|
||||
|
||||
bool stream_printf(
|
||||
const std::string& format_string, uint32_t offset, std::ostream* out) {
|
||||
if (offset == format_string.length()) return true;
|
||||
assert(offset < format_string.length());
|
||||
cerr << "length:" << format_string.length() << endl;
|
||||
cerr << "offset:" << offset << endl;
|
||||
auto txt = format_string.substr(offset, format_string.length() - offset);
|
||||
*out << txt;
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace cxxmph
|
||||
133
deps/cmph/cxxmph/string_util.h
vendored
Normal file
133
deps/cmph/cxxmph/string_util.h
vendored
Normal file
@@ -0,0 +1,133 @@
|
||||
#ifndef __CXXMPH_STRING_UTIL_H__
|
||||
#define __CXXMPH_STRING_UTIL_H__
|
||||
|
||||
// Helper functions for string formatting and terminal output. Should be used
|
||||
// only for debugging and tests, since performance was not a concern.
|
||||
// Implemented using variadic templates because it is cool.
|
||||
//
|
||||
// Adds the extra format %v to the printf formatting language. Uses the method
|
||||
// cxxmph::tostr to implement custom printers and fallback to operator
|
||||
// ostream::operator<< otherwise.
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#define CXXMPH_DEBUGLN(fmt) variadic_print(__FILE__, __LINE__, &std::cerr, fmt)
|
||||
#define CXXMPH_INFOLN(fmt) variadic_print(__FILE__, __LINE__, &std::cout, fmt)
|
||||
|
||||
namespace cxxmph {
|
||||
|
||||
using std::pair;
|
||||
using std::string;
|
||||
using std::ostream;
|
||||
using std::vector;
|
||||
|
||||
template <class T> void tostr(ostream *out, const T& v) {
|
||||
*out << v;
|
||||
}
|
||||
inline void tostr(std::ostream* out, uint8_t v) {
|
||||
*out << static_cast<uint32_t>(v);
|
||||
}
|
||||
template <class V>
|
||||
inline void tostr(ostream* out, const vector<V>& v) {
|
||||
*out << "[";
|
||||
for (uint32_t i = 0; i < v.size(); ++i) {
|
||||
tostr(out, v[1]);
|
||||
if (i != v.size() - 1)*out << " ";
|
||||
}
|
||||
*out << "]";
|
||||
}
|
||||
template <class F, class S>
|
||||
inline void tostr(ostream* out, const pair<F, S>& v) {
|
||||
*out << "(";
|
||||
tostr(out, v.first);
|
||||
*out << ",";
|
||||
tostr(out, v.second);
|
||||
*out << ")";
|
||||
}
|
||||
|
||||
bool stream_printf(
|
||||
const std::string& format_string, uint32_t offset, std::ostream* out);
|
||||
|
||||
template <bool ispod> struct pod_snprintf {};
|
||||
template <> struct pod_snprintf<false> {
|
||||
template <class T>
|
||||
int operator()(char*, size_t, const char*, const T&) {
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
template <> struct pod_snprintf<true> {
|
||||
template <class T>
|
||||
int operator()(char* str, size_t size, const char* format, const T& v) {
|
||||
return snprintf(str, size, format, v);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename... Args>
|
||||
bool stream_printf(const std::string& format_string, uint32_t offset,
|
||||
std::ostream* out, const T& value, Args&&... args) {
|
||||
auto txt = format_string.c_str() + offset;
|
||||
while (*txt) {
|
||||
auto b = txt;
|
||||
for (; *txt != '%'; ++txt);
|
||||
if (*(txt + 1) == '%') ++txt;
|
||||
else if (txt == b) break;
|
||||
*out << string(b, txt - b);
|
||||
if (*(txt - 1) == '%') ++txt;
|
||||
}
|
||||
auto fmt = txt + 1;
|
||||
while (*fmt && *fmt != '%') ++fmt;
|
||||
if (strncmp(txt, "%v", 2) == 0) {
|
||||
txt += 2;
|
||||
tostr(out, value);
|
||||
if (txt != fmt) *out << string(txt, fmt);
|
||||
} else {
|
||||
char buf[256]; // Is this enough?
|
||||
auto n = pod_snprintf<std::is_pod<T>::value>()(
|
||||
buf, 256, std::string(txt, fmt).c_str(), value);
|
||||
if (n < 0) return false;
|
||||
*out << buf;
|
||||
}
|
||||
return stream_printf(format_string, fmt - format_string.c_str(), out,
|
||||
std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
std::string format(const std::string& format_string, Args&&... args) {
|
||||
std::ostringstream out;
|
||||
if (!stream_printf(format_string, 0, &out, std::forward<Args>(args)...)) {
|
||||
return std::string();
|
||||
};
|
||||
return out.str();
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
void infoln(const std::string& format_string, Args&&... args) {
|
||||
stream_printf(format_string + "\n", 0, &std::cout, std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
struct variadic_print {
|
||||
variadic_print(const std::string& file, uint32_t line, std::ostream* out,
|
||||
const std::string& format_line)
|
||||
: file_(file), line_(line), out_(out), format_line_(format_line) {}
|
||||
template <typename... Args>
|
||||
void operator()(Args&&... args) {
|
||||
std::string fancy_format = "%v:%d: ";
|
||||
fancy_format += format_line_ + "\n";
|
||||
stream_printf(fancy_format, 0, out_, file_, line_, std::forward<Args>(args)...);
|
||||
}
|
||||
const std::string& file_;
|
||||
const uint32_t& line_;
|
||||
std::ostream* out_;
|
||||
const std::string& format_line_;
|
||||
};
|
||||
|
||||
} // namespace cxxmph
|
||||
|
||||
#endif // __CXXMPH_STRING_UTIL_H__
|
||||
27
deps/cmph/cxxmph/string_util_test.cc
vendored
Normal file
27
deps/cmph/cxxmph/string_util_test.cc
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
#include "string_util.h"
|
||||
#include "test.h"
|
||||
|
||||
using namespace cxxmph;
|
||||
|
||||
bool test_format() {
|
||||
string expected = " %% 4 foo 0x0A bar ";
|
||||
string foo = "foo";
|
||||
string fmt = format(" %%%% %v %v 0x%.2X bar ", 4, foo, 10);
|
||||
fail_unless(fmt == expected, "expected\n-%s-\n got \n-%s-", expected.c_str(), fmt.c_str());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool test_infoln() {
|
||||
infoln(string("%s:%d: MY INFO LINE"), __FILE__, __LINE__);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool test_macro() {
|
||||
CXXMPH_DEBUGLN("here i am")();
|
||||
return true;
|
||||
}
|
||||
|
||||
CXXMPH_TEST_CASE(test_format)
|
||||
CXXMPH_TEST_CASE(test_infoln)
|
||||
CXXMPH_TEST_CASE(test_macro)
|
||||
182
deps/cmph/cxxmph/stringpiece.h
vendored
Normal file
182
deps/cmph/cxxmph/stringpiece.h
vendored
Normal file
@@ -0,0 +1,182 @@
|
||||
// Copyright 2001-2010 The RE2 Authors. All Rights Reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// A string-like object that points to a sized piece of memory.
|
||||
//
|
||||
// Functions or methods may use const StringPiece& parameters to accept either
|
||||
// a "const char*" or a "string" value that will be implicitly converted to
|
||||
// a StringPiece. The implicit conversion means that it is often appropriate
|
||||
// to include this .h file in other files rather than forward-declaring
|
||||
// StringPiece as would be appropriate for most other Google classes.
|
||||
//
|
||||
// Systematic usage of StringPiece is encouraged as it will reduce unnecessary
|
||||
// conversions from "const char*" to "string" and back again.
|
||||
//
|
||||
//
|
||||
// Arghh! I wish C++ literals were "string".
|
||||
|
||||
#ifndef CXXMPH_STRINGPIECE_H__
|
||||
#define CXXMPH_STRINGPIECE_H__
|
||||
|
||||
#include <cstddef>
|
||||
#include <string.h>
|
||||
#include <iosfwd>
|
||||
#include <string>
|
||||
|
||||
namespace cxxmph {
|
||||
|
||||
class StringPiece {
|
||||
private:
|
||||
const char* ptr_;
|
||||
int length_;
|
||||
|
||||
public:
|
||||
// We provide non-explicit singleton constructors so users can pass
|
||||
// in a "const char*" or a "string" wherever a "StringPiece" is
|
||||
// expected.
|
||||
StringPiece() : ptr_(NULL), length_(0) { }
|
||||
StringPiece(const char* str)
|
||||
: ptr_(str), length_((str == NULL) ? 0 : static_cast<int>(strlen(str))) { }
|
||||
StringPiece(const std::string& str)
|
||||
: ptr_(str.data()), length_(static_cast<int>(str.size())) { }
|
||||
StringPiece(const char* offset, int len) : ptr_(offset), length_(len) { }
|
||||
|
||||
// data() may return a pointer to a buffer with embedded NULs, and the
|
||||
// returned buffer may or may not be null terminated. Therefore it is
|
||||
// typically a mistake to pass data() to a routine that expects a NUL
|
||||
// terminated string.
|
||||
const char* data() const { return ptr_; }
|
||||
int size() const { return length_; }
|
||||
int length() const { return length_; }
|
||||
bool empty() const { return length_ == 0; }
|
||||
|
||||
void clear() { ptr_ = NULL; length_ = 0; }
|
||||
void set(const char* data, int len) { ptr_ = data; length_ = len; }
|
||||
void set(const char* str) {
|
||||
ptr_ = str;
|
||||
if (str != NULL)
|
||||
length_ = static_cast<int>(strlen(str));
|
||||
else
|
||||
length_ = 0;
|
||||
}
|
||||
void set(const void* data, int len) {
|
||||
ptr_ = reinterpret_cast<const char*>(data);
|
||||
length_ = len;
|
||||
}
|
||||
|
||||
char operator[](int i) const { return ptr_[i]; }
|
||||
|
||||
void remove_prefix(int n) {
|
||||
ptr_ += n;
|
||||
length_ -= n;
|
||||
}
|
||||
|
||||
void remove_suffix(int n) {
|
||||
length_ -= n;
|
||||
}
|
||||
|
||||
int compare(const StringPiece& x) const {
|
||||
int r = memcmp(ptr_, x.ptr_, std::min(length_, x.length_));
|
||||
if (r == 0) {
|
||||
if (length_ < x.length_) r = -1;
|
||||
else if (length_ > x.length_) r = +1;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
std::string as_string() const {
|
||||
return std::string(data(), size());
|
||||
}
|
||||
// We also define ToString() here, since many other string-like
|
||||
// interfaces name the routine that converts to a C++ string
|
||||
// "ToString", and it's confusing to have the method that does that
|
||||
// for a StringPiece be called "as_string()". We also leave the
|
||||
// "as_string()" method defined here for existing code.
|
||||
std::string ToString() const {
|
||||
return std::string(data(), size());
|
||||
}
|
||||
|
||||
void CopyToString(std::string* target) const;
|
||||
void AppendToString(std::string* target) const;
|
||||
|
||||
// Does "this" start with "x"
|
||||
bool starts_with(const StringPiece& x) const {
|
||||
return ((length_ >= x.length_) &&
|
||||
(memcmp(ptr_, x.ptr_, x.length_) == 0));
|
||||
}
|
||||
|
||||
// Does "this" end with "x"
|
||||
bool ends_with(const StringPiece& x) const {
|
||||
return ((length_ >= x.length_) &&
|
||||
(memcmp(ptr_ + (length_-x.length_), x.ptr_, x.length_) == 0));
|
||||
}
|
||||
|
||||
// standard STL container boilerplate
|
||||
typedef char value_type;
|
||||
typedef const char* pointer;
|
||||
typedef const char& reference;
|
||||
typedef const char& const_reference;
|
||||
typedef size_t size_type;
|
||||
typedef ptrdiff_t difference_type;
|
||||
static const size_type npos;
|
||||
typedef const char* const_iterator;
|
||||
typedef const char* iterator;
|
||||
typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
|
||||
typedef std::reverse_iterator<iterator> reverse_iterator;
|
||||
iterator begin() const { return ptr_; }
|
||||
iterator end() const { return ptr_ + length_; }
|
||||
const_reverse_iterator rbegin() const {
|
||||
return const_reverse_iterator(ptr_ + length_);
|
||||
}
|
||||
const_reverse_iterator rend() const {
|
||||
return const_reverse_iterator(ptr_);
|
||||
}
|
||||
// STLS says return size_type, but Google says return int
|
||||
int max_size() const { return length_; }
|
||||
int capacity() const { return length_; }
|
||||
|
||||
int copy(char* buf, size_type n, size_type pos = 0) const;
|
||||
|
||||
int find(const StringPiece& s, size_type pos = 0) const;
|
||||
int find(char c, size_type pos = 0) const;
|
||||
int rfind(const StringPiece& s, size_type pos = npos) const;
|
||||
int rfind(char c, size_type pos = npos) const;
|
||||
|
||||
StringPiece substr(size_type pos, size_type n = npos) const;
|
||||
};
|
||||
|
||||
inline bool operator==(const StringPiece& x, const StringPiece& y) {
|
||||
return x.length() == y.length() && memcmp(x.data(), y.data(), x.length()) == 0;
|
||||
}
|
||||
|
||||
inline bool operator!=(const StringPiece& x, const StringPiece& y) {
|
||||
return !(x == y);
|
||||
}
|
||||
|
||||
inline bool operator<(const StringPiece& x, const StringPiece& y) {
|
||||
const int r = memcmp(x.data(), y.data(),
|
||||
std::min(x.size(), y.size()));
|
||||
return ((r < 0) || ((r == 0) && (x.size() < y.size())));
|
||||
}
|
||||
|
||||
inline bool operator>(const StringPiece& x, const StringPiece& y) {
|
||||
return y < x;
|
||||
}
|
||||
|
||||
inline bool operator<=(const StringPiece& x, const StringPiece& y) {
|
||||
return !(x > y);
|
||||
}
|
||||
|
||||
inline bool operator>=(const StringPiece& x, StringPiece& y) {
|
||||
return !(x < y);
|
||||
}
|
||||
|
||||
} // namespace cxxmph
|
||||
|
||||
// allow StringPiece to be logged
|
||||
inline std::ostream& operator<<(std::ostream& o, const cxxmph::StringPiece& piece) {
|
||||
o << piece.as_string(); return o;
|
||||
}
|
||||
|
||||
#endif // CXXMPH_STRINGPIECE_H__
|
||||
22
deps/cmph/cxxmph/test.cc
vendored
Normal file
22
deps/cmph/cxxmph/test.cc
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
#include <cstdlib> // For EXIT_SUCCESS, EXIT_FAILURE
|
||||
|
||||
#include "test.h"
|
||||
|
||||
Suite* global_suite() {
|
||||
static Suite* gs = suite_create("cxxmph_test_suite");
|
||||
return gs;
|
||||
}
|
||||
TCase* global_tc_core() {
|
||||
static TCase* gtc = tcase_create("Core");
|
||||
return gtc;
|
||||
}
|
||||
|
||||
int main (void) {
|
||||
suite_add_tcase(global_suite(), global_tc_core());
|
||||
int number_failed;
|
||||
SRunner *sr = srunner_create (global_suite());
|
||||
srunner_run_all (sr, CK_NORMAL);
|
||||
number_failed = srunner_ntests_failed (sr);
|
||||
srunner_free (sr);
|
||||
return (number_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE;
|
||||
}
|
||||
32
deps/cmph/cxxmph/test.h
vendored
Normal file
32
deps/cmph/cxxmph/test.h
vendored
Normal file
@@ -0,0 +1,32 @@
|
||||
#ifndef __CXXMPH_TEST_H__
|
||||
#define __CXXMPH_TEST_H__
|
||||
|
||||
// Thin wrapper on top of check.h to get rid of boilerplate in tests. Assumes a
|
||||
// single test suite and test case per file, with each fixture represented by a
|
||||
// parameter-less boolean function.
|
||||
//
|
||||
// The check.h header macro-clashes with c++ libraries so this file needs to be
|
||||
// included last.
|
||||
|
||||
#include <check.h>
|
||||
#include <stdio.h>
|
||||
|
||||
Suite* global_suite();
|
||||
TCase* global_tc_core();
|
||||
|
||||
// Creates a new test case calling boolean_function. Name must be a valid,
|
||||
// unique c identifier when prefixed with tc_.
|
||||
#define CXXMPH_CXX_TEST_CASE(name, boolean_function) \
|
||||
START_TEST(tc_ ## name) \
|
||||
{ fail_unless(boolean_function()); } END_TEST \
|
||||
static TestCase global_cxxmph_tc_ ## name(tc_ ## name);
|
||||
|
||||
#define CXXMPH_TEST_CASE(name) CXXMPH_CXX_TEST_CASE(name, name)
|
||||
|
||||
struct TestCase {
|
||||
TestCase(void (*f)(int)) {
|
||||
tcase_add_test(global_tc_core(), f);
|
||||
}
|
||||
};
|
||||
|
||||
#endif // __CXXMPH_TEST_H__
|
||||
4
deps/cmph/cxxmph/test_test.cc
vendored
Normal file
4
deps/cmph/cxxmph/test_test.cc
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
#include "test.h"
|
||||
|
||||
bool tautology() { return true; }
|
||||
CXXMPH_TEST_CASE(tautology)
|
||||
82
deps/cmph/cxxmph/trigraph.cc
vendored
Normal file
82
deps/cmph/cxxmph/trigraph.cc
vendored
Normal file
@@ -0,0 +1,82 @@
|
||||
#include <cassert>
|
||||
#include <limits>
|
||||
#include <iostream>
|
||||
|
||||
#include "trigraph.h"
|
||||
|
||||
using std::cerr;
|
||||
using std::endl;
|
||||
using std::vector;
|
||||
|
||||
namespace {
|
||||
static const uint32_t kInvalidEdge = std::numeric_limits<uint32_t>::max();
|
||||
}
|
||||
|
||||
namespace cxxmph {
|
||||
|
||||
TriGraph::TriGraph(uint32_t nvertices, uint32_t nedges)
|
||||
: nedges_(0),
|
||||
edges_(nedges),
|
||||
next_edge_(nedges),
|
||||
first_edge_(nvertices, kInvalidEdge),
|
||||
vertex_degree_(nvertices, 0) { }
|
||||
TriGraph::~TriGraph() {}
|
||||
|
||||
void TriGraph::ExtractEdgesAndClear(vector<Edge>* edges) {
|
||||
vector<Edge>().swap(next_edge_);
|
||||
vector<uint32_t>().swap(first_edge_);
|
||||
vector<uint8_t>().swap(vertex_degree_);
|
||||
nedges_ = 0;
|
||||
edges->swap(edges_);
|
||||
}
|
||||
void TriGraph::AddEdge(const Edge& edge) {
|
||||
edges_[nedges_] = edge;
|
||||
assert(first_edge_.size() > edge[0]);
|
||||
assert(first_edge_.size() > edge[1]);
|
||||
assert(first_edge_.size() > edge[0]);
|
||||
assert(first_edge_.size() > edge[1]);
|
||||
assert(first_edge_.size() > edge[2]);
|
||||
assert(next_edge_.size() > nedges_);
|
||||
next_edge_[nedges_] = Edge(
|
||||
first_edge_[edge[0]], first_edge_[edge[1]], first_edge_[edge[2]]);
|
||||
first_edge_[edge[0]] = first_edge_[edge[1]] = first_edge_[edge[2]] = nedges_;
|
||||
++vertex_degree_[edge[0]];
|
||||
++vertex_degree_[edge[1]];
|
||||
++vertex_degree_[edge[2]];
|
||||
++nedges_;
|
||||
}
|
||||
|
||||
void TriGraph::RemoveEdge(uint32_t current_edge) {
|
||||
// cerr << "Removing edge " << current_edge << " from " << nedges_ << " existing edges " << endl;
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
uint32_t vertex = edges_[current_edge][i];
|
||||
uint32_t edge1 = first_edge_[vertex];
|
||||
uint32_t edge2 = kInvalidEdge;
|
||||
uint32_t j = 0;
|
||||
while (edge1 != current_edge && edge1 != kInvalidEdge) {
|
||||
edge2 = edge1;
|
||||
if (edges_[edge1][0] == vertex) j = 0;
|
||||
else if (edges_[edge1][1] == vertex) j = 1;
|
||||
else j = 2;
|
||||
edge1 = next_edge_[edge1][j];
|
||||
}
|
||||
assert(edge1 != kInvalidEdge);
|
||||
if (edge2 != kInvalidEdge) next_edge_[edge2][j] = next_edge_[edge1][i];
|
||||
else first_edge_[vertex] = next_edge_[edge1][i];
|
||||
--vertex_degree_[vertex];
|
||||
}
|
||||
}
|
||||
|
||||
void TriGraph::DebugGraph() const {
|
||||
uint32_t i;
|
||||
for(i = 0; i < edges_.size(); i++){
|
||||
cerr << i << " " << edges_[i][0] << " " << edges_[i][1] << " " << edges_[i][2]
|
||||
<< " nexts " << next_edge_[i][0] << " " << next_edge_[i][1] << " " << next_edge_[i][2] << endl;
|
||||
}
|
||||
for(i = 0; i < first_edge_.size();i++){
|
||||
cerr << "first for vertice " <<i << " " << first_edge_[i] << endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace cxxmph
|
||||
49
deps/cmph/cxxmph/trigraph.h
vendored
Normal file
49
deps/cmph/cxxmph/trigraph.h
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
#ifndef __CXXMPH_TRIGRAPH_H__
|
||||
#define __CXXMPH_TRIGRAPH_H__
|
||||
// Build a trigraph using a memory efficient representation.
|
||||
//
|
||||
// Prior knowledge of the number of edges and vertices for the graph is
|
||||
// required. For each vertex, we store how many edges touch it (degree) and the
|
||||
// index of the first edge in the vector of triples representing the edges.
|
||||
|
||||
#include <stdint.h> // for uint32_t and friends
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace cxxmph {
|
||||
|
||||
class TriGraph {
|
||||
public:
|
||||
struct Edge {
|
||||
Edge() { }
|
||||
Edge(uint32_t v0, uint32_t v1, uint32_t v2) {
|
||||
vertices[0] = v0;
|
||||
vertices[1] = v1;
|
||||
vertices[2] = v2;
|
||||
}
|
||||
uint32_t& operator[](uint8_t v) { return vertices[v]; }
|
||||
const uint32_t& operator[](uint8_t v) const { return vertices[v]; }
|
||||
uint32_t vertices[3];
|
||||
};
|
||||
TriGraph(uint32_t nedges, uint32_t nvertices);
|
||||
~TriGraph();
|
||||
void AddEdge(const Edge& edge);
|
||||
void RemoveEdge(uint32_t edge_id);
|
||||
void ExtractEdgesAndClear(std::vector<Edge>* edges);
|
||||
void DebugGraph() const;
|
||||
|
||||
const std::vector<Edge>& edges() const { return edges_; }
|
||||
const std::vector<uint8_t>& vertex_degree() const { return vertex_degree_; }
|
||||
const std::vector<uint32_t>& first_edge() const { return first_edge_; }
|
||||
|
||||
private:
|
||||
uint32_t nedges_; // total number of edges
|
||||
std::vector<Edge> edges_;
|
||||
std::vector<Edge> next_edge_; // for implementing removal
|
||||
std::vector<uint32_t> first_edge_; // the first edge for this vertex
|
||||
std::vector<uint8_t> vertex_degree_; // number of edges for this vertex
|
||||
};
|
||||
|
||||
} // namespace cxxmph
|
||||
|
||||
#endif // __CXXMPH_TRIGRAPH_H__
|
||||
22
deps/cmph/cxxmph/trigraph_test.cc
vendored
Normal file
22
deps/cmph/cxxmph/trigraph_test.cc
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
#include <cassert>
|
||||
|
||||
#include "trigraph.h"
|
||||
|
||||
using cxxmph::TriGraph;
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
TriGraph g(4, 2);
|
||||
g.AddEdge(TriGraph::Edge(0, 1, 2));
|
||||
g.AddEdge(TriGraph::Edge(1, 3, 2));
|
||||
assert(g.vertex_degree()[0] == 1);
|
||||
assert(g.vertex_degree()[1] == 2);
|
||||
assert(g.vertex_degree()[2] == 2);
|
||||
assert(g.vertex_degree()[3] == 1);
|
||||
g.RemoveEdge(0);
|
||||
assert(g.vertex_degree()[0] == 0);
|
||||
assert(g.vertex_degree()[1] == 1);
|
||||
assert(g.vertex_degree()[2] == 1);
|
||||
assert(g.vertex_degree()[3] == 1);
|
||||
std::vector<TriGraph::Edge> edges;
|
||||
g.ExtractEdgesAndClear(&edges);
|
||||
}
|
||||
Reference in New Issue
Block a user