Optimized slack_type.

This commit is contained in:
Davi Reis 2012-03-21 10:20:30 -03:00
parent b8610f52e1
commit 1bb2d6a4dc
7 changed files with 109 additions and 15 deletions

View File

@ -1,5 +1,5 @@
TESTS = $(check_PROGRAMS)
check_PROGRAMS = mph_bits_test hollow_iterator_test mph_map_test mph_index_test trigraph_test
check_PROGRAMS = seeded_hash_test mph_bits_test hollow_iterator_test mph_map_test mph_index_test trigraph_test
noinst_PROGRAMS = bm_index bm_map
bin_PROGRAMS = cxxmph
lib_LTLIBRARIES = libcxxmph.la
@ -27,9 +27,10 @@ cxxmph_LDADD = libcxxmph.la
cxxmph_SOURCES = cxxmph.cc
hollow_iterator_test_SOURCES = hollow_iterator_test.cc
seeded_hash_test_SOURCES = seeded_hash_test.cc
seeded_hash_test_LDADD = libcxxmph.la
mph_bits_test_SOURCES = mph_bits_test.cc
mph_bits_test_LDADD = libcxxmph.la
mph_bits_test_LDADD = libcxxmph.la

View File

@ -8,6 +8,9 @@ using cxxmph::mph_map;
using std::string;
using std::unordered_map;
// Another reference benchmark:
// http://blog.aggregateknowledge.com/tag/bigmemory/
namespace cxxmph {
template<class MapType, class T>

View File

@ -63,6 +63,23 @@ static uint32_t nextpoweroftwo(uint32_t k) {
return k+1;
}
template <int n, int mask = (1 << 7)> struct bitcount {
enum { value = (n & mask ? 1:0) + bitcount<n, (mask >> 1)>::value };
};
template <int n> struct bitcount<n, 0> { enum { value = 0 }; };
template <int size, int index = size>
class CompileTimeRankTable {
public:
CompileTimeRankTable() : current(bitcount<index - 1>::value) { }
int operator[] (int i) { return *(&current + size - i - 1); }
private:
unsigned char current;
CompileTimeRankTable<index -1> next;
};
template <int size> class CompileTimeRankTable<size, 0> { };
typedef CompileTimeRankTable<256> Ranktable;
// Interesting bit tricks that might end up here:
// http://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
// Fast a % (k*2^t)

View File

@ -4,6 +4,8 @@
#include "mph_bits.h"
using cxxmph::dynamic_2bitset;
using cxxmph::Ranktable;
int main(int argc, char** argv) {
dynamic_2bitset small(256, true);
for (int i = 0; i < small.size(); ++i) small.set(i, i % 4);
@ -52,6 +54,12 @@ int main(int argc, char** argv) {
empty.clear();
dynamic_2bitset large(1000, true);
empty.swap(large);
Ranktable ranktable;
if (ranktable[0] != 0) exit(-1);
if (ranktable[1] != 1) exit(-1);
if (ranktable[2] != 1) exit(-1);
if (ranktable[255] != 8) exit(-1);
}

View File

@ -112,10 +112,11 @@ class mph_map {
std::vector<value_type> values_;
std::vector<bool> present_;
SimpleMPHIndex<Key, typename seeded_hash<HashFcn>::hash_function> index_;
// TODO(davi) optimize slack to hold 128 unique bits from hash64 as key
typedef unordered_map<Key, uint32_t, HashFcn, EqualKey, Alloc> slack_type;
// TODO(davi) optimize slack to use hash from index rather than calculate its own
typedef unordered_map<h128, uint32_t, h128::hash32> slack_type;
slack_type slack_;
size_type size_;
typename seeded_hash<HashFcn>::hash_function hasher128_;
mutable uint64_t fast_;
mutable uint64_t fast_taken_;
@ -148,7 +149,9 @@ MPH_MAP_METHOD_DECL(insert_return_type, insert)(const value_type& x) {
values_.push_back(x);
present_.push_back(true);
++size_;
slack_.insert(make_pair(x.first, values_.size() - 1));
h128 h = hasher128_.hash128(x.first, 0);
if (slack_.find(h) != slack_.end()) should_pack = true; // unavoidable pack
else slack_.insert(std::make_pair(h, values_.size() - 1));
if (should_pack) pack();
it = find(x.first);
slow_ = 0;
@ -218,7 +221,7 @@ MPH_MAP_METHOD_DECL(const_iterator, slow_find)(const key_type& k, uint32_t perfe
}
if (__builtin_expect(!slack_.empty(), 0)) {
++very_slow_;
auto sit = slack_.find(k);
auto sit = slack_.find(hasher128_.hash128(k, 0));
if (sit != slack_.end()) return make_iterator(values_.begin() + sit->second);
}
return end();
@ -233,7 +236,7 @@ MPH_MAP_METHOD_DECL(iterator, slow_find)(const key_type& k, uint32_t perfect_has
}
if (__builtin_expect(!slack_.empty(), 0)) {
++very_slow_;
auto sit = slack_.find(k);
auto sit = slack_.find(hasher128_.hash128(k, 0));
if (sit != slack_.end()) return make_iterator(values_.begin() + sit->second);
}
return end();

View File

@ -4,6 +4,7 @@
#include <stdint.h> // for uint32_t and friends
#include <cstdlib>
#include <cstring>
#include <unordered_map> // for std::hash
#include "MurmurHash3.h"
@ -17,13 +18,15 @@ uint64_t fmix ( uint64_t h );
namespace cxxmph {
struct h128 {
uint32_t operator[](uint8_t i) const { return uint32[i]; }
const uint32_t& operator[](uint8_t i) const { return uint32[i]; }
uint32_t& operator[](uint8_t i) { return uint32[i]; }
uint64_t* uint64ptr(bool second) { return reinterpret_cast<uint64_t*>(&uint32[static_cast<uint8_t>(second) << 1]); }
uint64_t uint64(bool second) const { return *reinterpret_cast<const uint64_t*>(&uint32[static_cast<uint8_t>(second) << 1]); }
bool operator==(const h128 rhs) const { return uint64(0) == rhs.uint64(0) && uint64(1) == rhs.uint64(1); }
const uint64_t get64(bool second) const { return (static_cast<uint64_t>(uint32[second << 1]) << 32) | uint32[1 + (second << 1)]; }
void set64(uint64_t v, bool second) { uint32[second << 1] = v >> 32; uint32[1+(second<<1)] = ((v << 32) >> 32); }
bool operator==(const h128 rhs) const { return memcmp(uint32, rhs.uint32, sizeof(uint32)) == 0; }
uint32_t uint32[4];
struct hash32 { uint32_t operator()(const cxxmph::h128& h) const { return h[3]; } };
};
template <class HashFcn>
@ -83,8 +86,8 @@ struct Murmur3Fmix64bitsType {
template <class Key>
h128 hash128(const Key& k) const {
h128 h;
*h.uint64ptr(0) = fmix(k);
*h.uint64ptr(1) = fmix(h.uint64(0));
h.set64(fmix(k), 0);
h.set64(fmix(h.get64(0)), 1);
}
};
@ -131,8 +134,8 @@ struct seeded_hash_function<Murmur3Fmix64bitsType> {
template <class Key>
h128 hash128(const Key& k, uint32_t seed) const {
h128 h;
*h.uint64ptr(0) = fmix(k ^ seed);
*h.uint64ptr(1) = fmix(h.uint64(0));
h.set64(fmix(k ^ seed), 0);
h.set64(fmix(h.get64(0)), 1);
return h;
}
};

View File

@ -0,0 +1,59 @@
#include "seeded_hash.h"
#include <unordered_map>
#include <string>
#include <iostream>
using std::cerr;
using std::endl;
using std::string;
using std::unordered_map;
using namespace cxxmph;
int main(int argc, char** argv) {
auto hasher = seeded_hash_function<Murmur3StringPiece>();
string key1("0");
string key2("1");
auto h1 = hasher.hash128(key1, 1);
auto h2 = hasher.hash128(key2, 1);
if (h1 == h2) {
fprintf(stderr, "unexpected murmur collision\n");
exit(-1);
}
unordered_map<uint64_t, int, Murmur3Fmix64bitsType> g;
for (int i = 0; i < 1000; ++i) g[i] = i;
for (int i = 0; i < 1000; ++i) if (g[i] != i) exit(-1);
auto inthasher = seeded_hash_function<std::hash<uint64_t>>();
unordered_map<h128, int, h128::hash32> g2;
for (uint64_t i = 0; i < 1000; ++i) {
auto h = inthasher.hash128(i, 0);
if (g2.find(h) != g2.end()) {
std::cerr << "Incorrectly found " << i << std::endl;
exit(-1);
}
if (h128::hash32()(h) != h[3]) {
cerr << "Buggy hash method." << endl;
exit(-1);
}
auto h2 = inthasher.hash128(i, 0);
if (!(h == h2)) {
cerr << "h 64(0) " << h.get64(0) << " h 64(1) " << h.get64(1) << endl;
cerr << " h2 64(0) " << h2.get64(0) << " h2 64(1) " << h2.get64(1) << endl;
cerr << "Broken equality for h128" << endl;
exit(-1);
}
if (h128::hash32()(h) != h128::hash32()(h2)) {
cerr << "Inconsistent hash method." << endl;
exit(-1);
}
g2[h] = i;
if (g2.find(h) == g2.end()) {
std::cerr << "Incorrectly missed " << i << std::endl;
exit(-1);
}
}
for (uint64_t i = 0; i < 1000; ++i) if (g2[inthasher.hash128(i, 0)] != i) exit(-1);
}