turbonss/cxxmph/mphtable.h

159 lines
5.0 KiB
C
Raw Normal View History

#ifndef __CXXMPH_MPHTABLE_H__
#define __CXXMPH_MPHTABLE_H__
2010-09-10 10:07:06 +03:00
// Minimal perfect hash abstraction implementing the BDZ algorithm
2011-02-14 00:40:26 +02:00
#include <stdint.h>
2010-11-09 02:02:18 +02:00
#include <cassert>
2010-10-28 03:17:09 +03:00
#include <cmath>
2010-11-08 22:19:44 +02:00
#include <unordered_map> // for std::hash
2010-10-05 17:51:17 +03:00
#include <vector>
2010-10-28 03:17:09 +03:00
#include <iostream>
using std::cerr;
using std::endl;
2011-02-14 00:40:26 +02:00
#include "cxxmph_hash.h"
2010-09-10 10:07:06 +03:00
#include "trigraph.h"
2010-06-28 22:01:18 +03:00
namespace cxxmph {
2010-06-28 22:01:18 +03:00
class MPHTable {
public:
2011-02-14 00:40:26 +02:00
MPHTable(double c = 1.23, uint8_t b = 7) :
2010-11-05 04:17:08 +02:00
c_(c), b_(b), m_(0), n_(0), k_(0), r_(0) { }
2010-10-28 03:17:09 +03:00
~MPHTable() {}
2010-06-28 22:01:18 +03:00
2010-11-05 08:40:15 +02:00
template <class SeededHashFcn, class ForwardIterator>
2010-09-10 10:07:06 +03:00
bool Reset(ForwardIterator begin, ForwardIterator end);
2010-11-05 08:40:15 +02:00
template <class SeededHashFcn, class Key> // must agree with Reset
2011-02-14 00:40:26 +02:00
uint32_t index(const Key& x) const;
uint32_t size() const { return m_; }
2010-10-29 09:26:37 +03:00
void clear();
2010-06-28 22:01:18 +03:00
private:
2010-11-05 08:40:15 +02:00
template <class SeededHashFcn, class ForwardIterator>
bool Mapping(ForwardIterator begin, ForwardIterator end,
2010-10-28 03:17:09 +03:00
std::vector<TriGraph::Edge>* edges,
2011-02-14 00:40:26 +02:00
std::vector<uint32_t>* queue);
bool GenerateQueue(TriGraph* graph, std::vector<uint32_t>* queue);
2010-10-28 03:17:09 +03:00
void Assigning(const std::vector<TriGraph::Edge>& edges,
2011-02-14 00:40:26 +02:00
const std::vector<uint32_t>& queue);
2010-10-28 03:17:09 +03:00
void Ranking();
2011-02-14 00:40:26 +02:00
uint32_t Rank(uint32_t vertex) const;
// Algorithm parameters
double c_; // Number of bits per key (? is it right)
2011-02-14 00:40:26 +02:00
uint8_t b_; // Number of bits of the kth index in the ranktable
// Values used during generation
2011-02-14 00:40:26 +02:00
uint32_t m_; // edges count
uint32_t n_; // vertex count
uint32_t k_; // kth index in ranktable, $k = log_2(n=3r)\varepsilon$
// Values used during search
// Partition vertex count, derived from c parameter.
2011-02-14 00:40:26 +02:00
uint32_t r_;
// The array containing the minimal perfect hash function graph.
2011-02-14 00:40:26 +02:00
std::vector<uint8_t> g_;
// The table used for the rank step of the minimal perfect hash function
2011-02-14 00:40:26 +02:00
std::vector<uint32_t> ranktable_;
2010-11-05 08:40:15 +02:00
// The selected hash seed triplet for finding the edges in the minimal
// perfect hash function graph.
2011-02-14 00:40:26 +02:00
uint32_t hash_seed_[3];
2010-11-05 08:40:15 +02:00
2011-02-14 00:40:26 +02:00
static const uint8_t valuemask[];
static void set_2bit_value(std::vector<uint8_t> *d, uint32_t i, uint8_t v) {
2010-11-05 08:40:15 +02:00
(*d)[(i >> 2)] &= (v << ((i & 3) << 1)) | valuemask[i & 3];
}
2011-02-14 00:40:26 +02:00
static uint32_t get_2bit_value(const std::vector<uint8_t>& d, uint32_t i) {
2010-11-05 08:40:15 +02:00
return (d[(i >> 2)] >> ((i & 3) << 1)) & 3;
}
2010-06-28 22:01:18 +03:00
};
2010-10-28 03:17:09 +03:00
// Template method needs to go in the header file.
2010-11-05 08:40:15 +02:00
template <class SeededHashFcn, class ForwardIterator>
2010-10-28 03:17:09 +03:00
bool MPHTable::Reset(ForwardIterator begin, ForwardIterator end) {
m_ = end - begin;
2011-02-14 00:40:26 +02:00
r_ = static_cast<uint32_t>(ceil((c_*m_)/3));
2010-10-28 03:17:09 +03:00
if ((r_ % 2) == 0) r_ += 1;
n_ = 3*r_;
k_ = 1U << b_;
2011-02-19 00:15:24 +02:00
// cerr << "m " << m_ << " n " << n_ << " r " << r_ << endl;
2010-10-28 03:17:09 +03:00
2010-11-09 07:38:46 +02:00
int iterations = 10;
2010-10-28 03:17:09 +03:00
std::vector<TriGraph::Edge> edges;
2011-02-14 00:40:26 +02:00
std::vector<uint32_t> queue;
2010-10-28 03:17:09 +03:00
while (1) {
2011-02-19 00:15:24 +02:00
// cerr << "Iterations missing: " << iterations << endl;
2010-11-09 07:38:46 +02:00
for (int i = 0; i < 3; ++i) hash_seed_[i] = random() % m_;
// for (int i = 0; i < 3; ++i) hash_seed_[i] = random() + i;
2010-11-05 08:40:15 +02:00
if (Mapping<SeededHashFcn>(begin, end, &edges, &queue)) break;
2010-10-28 03:17:09 +03:00
else --iterations;
if (iterations == 0) break;
}
if (iterations == 0) return false;
Assigning(edges, queue);
std::vector<TriGraph::Edge>().swap(edges);
Ranking();
return true;
}
2010-11-05 08:40:15 +02:00
template <class SeededHashFcn, class ForwardIterator>
2010-10-28 03:17:09 +03:00
bool MPHTable::Mapping(
ForwardIterator begin, ForwardIterator end,
2011-02-14 00:40:26 +02:00
std::vector<TriGraph::Edge>* edges, std::vector<uint32_t>* queue) {
2010-10-28 03:17:09 +03:00
TriGraph graph(n_, m_);
for (ForwardIterator it = begin; it != end; ++it) {
2011-02-14 00:40:26 +02:00
uint32_t h[3];
2010-11-05 08:40:15 +02:00
for (int i = 0; i < 3; ++i) h[i] = SeededHashFcn()(*it, hash_seed_[i]);
2011-02-14 00:40:26 +02:00
uint32_t v0 = h[0] % r_;
uint32_t v1 = h[1] % r_ + r_;
uint32_t v2 = h[2] % r_ + (r_ << 1);
2010-11-09 07:51:33 +02:00
// cerr << "Key: " << *it << " edge " << it - begin << " (" << v0 << "," << v1 << "," << v2 << ")" << endl;
2010-10-28 03:17:09 +03:00
graph.AddEdge(TriGraph::Edge(v0, v1, v2));
}
if (GenerateQueue(&graph, queue)) {
graph.ExtractEdgesAndClear(edges);
return true;
}
return false;
}
2010-11-05 08:40:15 +02:00
template <class SeededHashFcn, class Key>
2011-02-14 00:40:26 +02:00
uint32_t MPHTable::index(const Key& key) const {
uint32_t h[3];
2010-11-05 08:40:15 +02:00
for (int i = 0; i < 3; ++i) h[i] = SeededHashFcn()(key, hash_seed_[i]);
h[0] = h[0] % r_;
h[1] = h[1] % r_ + r_;
h[2] = h[2] % r_ + (r_ << 1);
2010-11-09 02:02:18 +02:00
assert(g_.size());
2010-11-09 07:51:33 +02:00
//cerr << "g_.size() " << g_.size() << " h0 >> 2 " << (h[0] >> 2) << endl;
2010-11-09 02:02:18 +02:00
assert((h[0] >> 2) <g_.size());
assert((h[1] >> 2) <g_.size());
assert((h[2] >> 2) <g_.size());
2011-02-14 00:40:26 +02:00
uint32_t vertex = h[(get_2bit_value(g_, h[0]) + get_2bit_value(g_, h[1]) + get_2bit_value(g_, h[2])) % 3];
2010-11-09 07:51:33 +02:00
// cerr << "Search found vertex " << vertex << endl;
2010-11-05 08:40:15 +02:00
return Rank(vertex);
}
2011-02-14 00:40:26 +02:00
template <class Key, class HashFcn = typename cxxmph_hash<std::hash<Key> >::hash_function>
2010-11-05 08:40:15 +02:00
class SimpleMPHTable : public MPHTable {
public:
template <class ForwardIterator>
bool Reset(ForwardIterator begin, ForwardIterator end) {
return MPHTable::Reset<HashFcn>(begin, end);
}
2011-02-14 00:40:26 +02:00
uint32_t index(const Key& key) { return MPHTable::index<HashFcn>(key); }
2010-11-05 08:40:15 +02:00
};
} // namespace cxxmph
2010-06-28 22:01:18 +03:00
2010-10-28 03:17:09 +03:00
#endif // __CXXMPH_MPHTABLE_H__