From d4d79c62bd8198d876bd22985d62a5829d43a3a5 Mon Sep 17 00:00:00 2001 From: Davi Reis Date: Tue, 20 Mar 2012 11:47:55 -0300 Subject: [PATCH] Improved hash signature. --- cxxmph/Makefile.am | 3 +++ cxxmph/mph_index.h | 16 ++----------- cxxmph/mph_map.h | 2 -- cxxmph/seeded_hash.h | 55 +++++++++++++++++++++++++++++++------------- 4 files changed, 44 insertions(+), 32 deletions(-) diff --git a/cxxmph/Makefile.am b/cxxmph/Makefile.am index db8ffa1..0534c68 100644 --- a/cxxmph/Makefile.am +++ b/cxxmph/Makefile.am @@ -27,6 +27,9 @@ cxxmph_LDADD = libcxxmph.la cxxmph_SOURCES = cxxmph.cc hollow_iterator_test_SOURCES = hollow_iterator_test.cc +seeded_hash_test_SOURCES = seeded_hash_test.cc + mph_bits_test_SOURCES = mph_bits_test.cc mph_bits_test_LDADD = libcxxmph.la +mph_bits_test_LDADD = libcxxmph.la diff --git a/cxxmph/mph_index.h b/cxxmph/mph_index.h index 2a217bc..6982534 100644 --- a/cxxmph/mph_index.h +++ b/cxxmph/mph_index.h @@ -68,10 +68,6 @@ class MPHIndex { template // must agree with Reset uint32_t minimal_perfect_hash(const Key& x) const; - // Crazy functions. Ignore. - template // must agree with Reset - void hash_vector(const Key& x, uint32_t* h) const; - private: template bool Mapping(ForwardIterator begin, ForwardIterator end, @@ -160,8 +156,7 @@ bool MPHIndex::Mapping( std::vector* edges, std::vector* queue) { TriGraph graph(n_, m_); for (ForwardIterator it = begin; it != end; ++it) { - uint32_t h[4]; - SeededHashFcn().hash64(*it, hash_seed_[0], reinterpret_cast(&h)); + h128 h = SeededHashFcn().hash128(*it, hash_seed_[0]); // for (int i = 0; i < 3; ++i) h[i] = SeededHashFcn()(*it, hash_seed_[i]); uint32_t v0 = h[0] % r_; uint32_t v1 = h[1] % r_ + r_; @@ -176,16 +171,10 @@ bool MPHIndex::Mapping( return false; } -template -void MPHIndex::hash_vector(const Key& key, uint32_t* h) const { - SeededHashFcn().hash64(key, hash_seed_[0], h); -} - template uint32_t MPHIndex::perfect_hash(const Key& key) const { - uint32_t h[4]; if (!g_.size()) return 0; - SeededHashFcn().hash64(key, hash_seed_[0], h); + h128 h = SeededHashFcn().hash128(key, hash_seed_[0]); h[0] = (h[0] % r_) + nest_displacement_[0]; h[1] = (h[1] % r_) + nest_displacement_[1]; h[2] = (h[2] % r_) + nest_displacement_[2]; @@ -222,7 +211,6 @@ class SimpleMPHIndex : public MPHIndex { uint32_t index(const Key& key) const { return MPHIndex::index(key); } uint32_t perfect_hash(const Key& key) const { return MPHIndex::perfect_hash(key); } uint32_t minimal_perfect_hash(const Key& key) const { return MPHIndex::minimal_perfect_hash(key); } - void hash_vector(const Key& key, uint32_t* h) const { MPHIndex::hash_vector(key, h); } }; } // namespace cxxmph diff --git a/cxxmph/mph_map.h b/cxxmph/mph_map.h index 9440fe8..60a52a6 100644 --- a/cxxmph/mph_map.h +++ b/cxxmph/mph_map.h @@ -199,8 +199,6 @@ MPH_MAP_METHOD_DECL(void_type, clear)() { MPH_MAP_METHOD_DECL(void_type, erase)(iterator pos) { present_[pos - begin] = false; - uint32_t h[4]; - index_.hash_vector(pos->first, &h); *pos = value_type(); --size_; } diff --git a/cxxmph/seeded_hash.h b/cxxmph/seeded_hash.h index 0979ef1..be0b39e 100644 --- a/cxxmph/seeded_hash.h +++ b/cxxmph/seeded_hash.h @@ -16,6 +16,16 @@ uint64_t fmix ( uint64_t h ); namespace cxxmph { +struct h128 { + uint32_t operator[](uint8_t i) const { return uint32[i]; } + uint32_t& operator[](uint8_t i) { return uint32[i]; } + uint64_t* uint64ptr(bool second) { return reinterpret_cast(&uint32[static_cast(second) << 1]); } + uint64_t uint64(bool second) const { return *reinterpret_cast(&uint32[static_cast(second) << 1]); } + bool operator==(const h128 rhs) const { return uint64(0) == rhs.uint64(0) && uint64(1) == rhs.uint64(1); } + + uint32_t uint32[4]; +}; + template struct seeded_hash_function { template @@ -23,11 +33,13 @@ struct seeded_hash_function { return HashFcn()(k) ^ seed; } template - void hash64(const Key& k, uint32_t seed, uint32_t* out) const { + h128 hash128(const Key& k, uint32_t seed) const { + h128 h; for (int i = 0; i < 4; ++i) { - out[i] = HashFcn()(k) ^ seed; + h.uint32[i] = HashFcn()(k) ^ seed; seed = fmix(seed); } + return h; } }; @@ -39,8 +51,10 @@ struct Murmur3 { return out; } template - void hash64(const Key& k, uint32_t* out) const { - MurmurHash3_x64_128(reinterpret_cast(&k), sizeof(Key), 1 /* seed */, out); + h128 hash128(const Key& k) const { + h128 h; + MurmurHash3_x64_128(reinterpret_cast(&k), sizeof(Key), 1 /* seed */, &h); + return h; } }; @@ -53,9 +67,11 @@ struct Murmur3StringPiece { return out; } template - void hash64(const Key& k, uint32_t* out) const { + h128 hash128(const Key& k) const { + h128 h; StringPiece s(k); - MurmurHash3_x64_128(s.data(), s.length(), 1 /* seed */, out); + MurmurHash3_x64_128(s.data(), s.length(), 1 /* seed */, &h); + return h; } }; @@ -65,9 +81,10 @@ struct Murmur3Fmix64bitsType { return fmix(*reinterpret_cast(&k)); } template - void hash64(const Key& k, uint32_t* out) const { - *reinterpret_cast(out) = fmix(k); - *(out + 2) = fmix(*out); + h128 hash128(const Key& k) const { + h128 h; + *h.uint64ptr(0) = fmix(k); + *h.uint64ptr(1) = fmix(h.uint64(0)); } }; @@ -80,8 +97,10 @@ struct seeded_hash_function { return out; } template - void hash64(const Key& k, uint32_t seed, uint32_t* out) const { - MurmurHash3_x64_128(reinterpret_cast(&k), sizeof(Key), seed, out); + h128 hash128(const Key& k, uint32_t seed) const { + h128 h; + MurmurHash3_x64_128(reinterpret_cast(&k), sizeof(Key), seed, &h); + return h; } }; @@ -95,9 +114,11 @@ struct seeded_hash_function { return out; } template - void hash64(const Key& k, uint32_t seed, uint32_t* out) const { + h128 hash128(const Key& k, uint32_t seed) const { + h128 h; StringPiece s(k); - MurmurHash3_x64_128(s.data(), s.length(), seed, out); + MurmurHash3_x64_128(s.data(), s.length(), seed, &h); + return h; } }; @@ -108,9 +129,11 @@ struct seeded_hash_function { return fmix(k + seed); } template - void hash64(const Key& k, uint32_t seed, uint32_t* out) const { - *reinterpret_cast(out) = fmix(k ^ seed); - *(out + 2) = fmix(*out); + h128 hash128(const Key& k, uint32_t seed) const { + h128 h; + *h.uint64ptr(0) = fmix(k ^ seed); + *h.uint64ptr(1) = fmix(h.uint64(0)); + return h; } };