turbonss/cxxmph/seeded_hash.h

178 lines
5.5 KiB
C
Raw Normal View History

2011-05-16 02:47:42 +03:00
#ifndef __CXXMPH_SEEDED_HASH_H__
#define __CXXMPH_SEEDED_HASH_H__
2011-02-14 00:40:26 +02:00
#include <stdint.h> // for uint32_t and friends
2010-11-05 08:40:15 +02:00
#include <cstdlib>
2011-11-10 20:44:37 +02:00
#include <unordered_map> // for std::hash
2010-11-05 08:40:15 +02:00
2012-03-14 00:34:24 +02:00
#include "MurmurHash3.h"
2010-11-05 08:40:15 +02:00
#include "stringpiece.h"
// From murmur, only used naively to extend 32 bits functions to 128 bits.
2012-03-14 00:34:24 +02:00
uint32_t fmix ( uint32_t h );
2012-03-15 23:14:39 +02:00
// Used for a quick and dirty hash function for integers. Probably a bad idea.
uint64_t fmix ( uint64_t h );
2012-03-14 00:34:24 +02:00
2010-11-05 08:40:15 +02:00
namespace cxxmph {
2012-03-20 16:47:55 +02:00
struct h128 {
uint32_t operator[](uint8_t i) const { return uint32[i]; }
uint32_t& operator[](uint8_t i) { return uint32[i]; }
uint64_t* uint64ptr(bool second) { return reinterpret_cast<uint64_t*>(&uint32[static_cast<uint8_t>(second) << 1]); }
uint64_t uint64(bool second) const { return *reinterpret_cast<const uint64_t*>(&uint32[static_cast<uint8_t>(second) << 1]); }
bool operator==(const h128 rhs) const { return uint64(0) == rhs.uint64(0) && uint64(1) == rhs.uint64(1); }
uint32_t uint32[4];
};
2010-11-05 08:40:15 +02:00
template <class HashFcn>
struct seeded_hash_function {
template <class Key>
2011-02-14 00:40:26 +02:00
uint32_t operator()(const Key& k, uint32_t seed) const {
2010-11-05 08:40:15 +02:00
return HashFcn()(k) ^ seed;
}
2012-03-14 00:34:24 +02:00
template <class Key>
2012-03-20 16:47:55 +02:00
h128 hash128(const Key& k, uint32_t seed) const {
h128 h;
2012-03-14 00:34:24 +02:00
for (int i = 0; i < 4; ++i) {
2012-03-20 16:47:55 +02:00
h.uint32[i] = HashFcn()(k) ^ seed;
2012-03-14 00:34:24 +02:00
seed = fmix(seed);
}
2012-03-20 16:47:55 +02:00
return h;
2012-03-14 00:34:24 +02:00
}
2010-11-05 08:40:15 +02:00
};
2012-03-14 00:34:24 +02:00
struct Murmur3 {
2010-11-05 08:40:15 +02:00
template<class Key>
2011-02-14 00:40:26 +02:00
uint32_t operator()(const Key& k) const {
2012-03-14 00:34:24 +02:00
uint32_t out;
MurmurHash3_x86_32(reinterpret_cast<const void*>(&k), sizeof(Key), 1 /* seed */, &out);
return out;
}
template <class Key>
2012-03-20 16:47:55 +02:00
h128 hash128(const Key& k) const {
h128 h;
MurmurHash3_x64_128(reinterpret_cast<const void*>(&k), sizeof(Key), 1 /* seed */, &h);
return h;
2010-11-05 08:40:15 +02:00
}
};
2012-03-14 00:34:24 +02:00
struct Murmur3StringPiece {
2010-11-05 08:40:15 +02:00
template <class Key>
2011-02-14 00:40:26 +02:00
uint32_t operator()(const Key& k) const {
2010-11-05 08:40:15 +02:00
StringPiece s(k);
2012-03-14 00:34:24 +02:00
uint32_t out;
MurmurHash3_x86_32(s.data(), s.length(), 1 /* seed */, &out);
return out;
}
template <class Key>
2012-03-20 16:47:55 +02:00
h128 hash128(const Key& k) const {
h128 h;
2012-03-14 00:34:24 +02:00
StringPiece s(k);
2012-03-20 16:47:55 +02:00
MurmurHash3_x64_128(s.data(), s.length(), 1 /* seed */, &h);
return h;
2010-11-05 08:40:15 +02:00
}
};
struct Murmur3Fmix64bitsType {
template <class Key>
uint32_t operator()(const Key& k) const {
return fmix(*reinterpret_cast<const uint64_t*>(&k));
}
template <class Key>
2012-03-20 16:47:55 +02:00
h128 hash128(const Key& k) const {
h128 h;
*h.uint64ptr(0) = fmix(k);
*h.uint64ptr(1) = fmix(h.uint64(0));
}
};
2010-11-05 08:40:15 +02:00
template <>
2012-03-14 00:34:24 +02:00
struct seeded_hash_function<Murmur3> {
2010-11-05 08:40:15 +02:00
template <class Key>
2011-02-14 00:40:26 +02:00
uint32_t operator()(const Key& k, uint32_t seed) const {
2012-03-14 00:34:24 +02:00
uint32_t out;
MurmurHash3_x86_32(reinterpret_cast<const void*>(&k), sizeof(Key), seed, &out);
return out;
}
template <class Key>
2012-03-20 16:47:55 +02:00
h128 hash128(const Key& k, uint32_t seed) const {
h128 h;
MurmurHash3_x64_128(reinterpret_cast<const void*>(&k), sizeof(Key), seed, &h);
return h;
2010-11-05 08:40:15 +02:00
}
};
template <>
2012-03-14 00:34:24 +02:00
struct seeded_hash_function<Murmur3StringPiece> {
2010-11-05 08:40:15 +02:00
template <class Key>
2011-02-14 00:40:26 +02:00
uint32_t operator()(const Key& k, uint32_t seed) const {
2010-11-05 08:40:15 +02:00
StringPiece s(k);
2012-03-14 00:34:24 +02:00
uint32_t out;
MurmurHash3_x86_32(s.data(), s.length(), seed, &out);
return out;
}
template <class Key>
2012-03-20 16:47:55 +02:00
h128 hash128(const Key& k, uint32_t seed) const {
h128 h;
2012-03-14 00:34:24 +02:00
StringPiece s(k);
2012-03-20 16:47:55 +02:00
MurmurHash3_x64_128(s.data(), s.length(), seed, &h);
return h;
2010-11-05 08:40:15 +02:00
}
};
template <>
struct seeded_hash_function<Murmur3Fmix64bitsType> {
template <class Key>
uint32_t operator()(const Key& k, uint32_t seed) const {
return fmix(k + seed);
}
template <class Key>
2012-03-20 16:47:55 +02:00
h128 hash128(const Key& k, uint32_t seed) const {
h128 h;
*h.uint64ptr(0) = fmix(k ^ seed);
*h.uint64ptr(1) = fmix(h.uint64(0));
return h;
}
};
2011-05-16 02:47:42 +03:00
template <class HashFcn> struct seeded_hash
2010-11-05 08:40:15 +02:00
{ typedef seeded_hash_function<HashFcn> hash_function; };
2012-03-14 00:34:24 +02:00
// Use Murmur3 instead for all types defined in std::hash, plus
2010-11-05 08:40:15 +02:00
// std::string which is commonly extended.
2011-11-10 20:44:37 +02:00
template <> struct seeded_hash<std::hash<char*> >
2012-03-14 00:34:24 +02:00
{ typedef seeded_hash_function<Murmur3StringPiece> hash_function; };
2011-11-10 20:44:37 +02:00
template <> struct seeded_hash<std::hash<const char*> >
2012-03-14 00:34:24 +02:00
{ typedef seeded_hash_function<Murmur3StringPiece> hash_function; };
2011-11-10 20:44:37 +02:00
template <> struct seeded_hash<std::hash<std::string> >
2012-03-14 00:34:24 +02:00
{ typedef seeded_hash_function<Murmur3StringPiece> hash_function; };
2011-11-10 20:44:37 +02:00
template <> struct seeded_hash<std::hash<cxxmph::StringPiece> >
2012-03-14 00:34:24 +02:00
{ typedef seeded_hash_function<Murmur3StringPiece> hash_function; };
2010-11-05 08:40:15 +02:00
2011-11-10 20:44:37 +02:00
template <> struct seeded_hash<std::hash<char> >
2012-03-14 00:34:24 +02:00
{ typedef seeded_hash_function<Murmur3> hash_function; };
2011-11-10 20:44:37 +02:00
template <> struct seeded_hash<std::hash<unsigned char> >
2012-03-14 00:34:24 +02:00
{ typedef seeded_hash_function<Murmur3> hash_function; };
2011-11-10 20:44:37 +02:00
template <> struct seeded_hash<std::hash<short> >
2012-03-14 00:34:24 +02:00
{ typedef seeded_hash_function<Murmur3> hash_function; };
2011-11-10 20:44:37 +02:00
template <> struct seeded_hash<std::hash<unsigned short> >
2012-03-14 00:34:24 +02:00
{ typedef seeded_hash_function<Murmur3> hash_function; };
2011-11-10 20:44:37 +02:00
template <> struct seeded_hash<std::hash<int> >
2012-03-14 00:34:24 +02:00
{ typedef seeded_hash_function<Murmur3> hash_function; };
2011-11-10 20:44:37 +02:00
template <> struct seeded_hash<std::hash<unsigned int> >
2012-03-14 00:34:24 +02:00
{ typedef seeded_hash_function<Murmur3> hash_function; };
2011-11-10 20:44:37 +02:00
template <> struct seeded_hash<std::hash<long> >
2012-03-14 00:34:24 +02:00
{ typedef seeded_hash_function<Murmur3> hash_function; };
2011-11-10 20:44:37 +02:00
template <> struct seeded_hash<std::hash<unsigned long> >
2012-03-14 00:34:24 +02:00
{ typedef seeded_hash_function<Murmur3> hash_function; };
2011-11-10 20:44:37 +02:00
template <> struct seeded_hash<std::hash<long long> >
2012-03-14 00:34:24 +02:00
{ typedef seeded_hash_function<Murmur3> hash_function; };
2011-11-10 20:44:37 +02:00
template <> struct seeded_hash<std::hash<unsigned long long> >
2012-03-14 00:34:24 +02:00
{ typedef seeded_hash_function<Murmur3> hash_function; };
2010-11-05 08:40:15 +02:00
} // namespace cxxmph
2011-05-16 02:47:42 +03:00
#endif // __CXXMPH_SEEDED_HASH_H__