2010-10-25 05:12:47 +03:00
|
|
|
#ifndef __CXXMPH_MPHTABLE_H__
|
|
|
|
#define __CXXMPH_MPHTABLE_H__
|
|
|
|
|
2010-09-10 10:07:06 +03:00
|
|
|
// Minimal perfect hash abstraction implementing the BDZ algorithm
|
|
|
|
|
2010-10-28 03:17:09 +03:00
|
|
|
#include <cmath>
|
2010-10-05 17:51:17 +03:00
|
|
|
#include <vector>
|
|
|
|
|
2010-10-28 03:17:09 +03:00
|
|
|
#include <iostream>
|
|
|
|
|
|
|
|
using std::cerr;
|
|
|
|
using std::endl;
|
|
|
|
|
2010-10-25 05:12:47 +03:00
|
|
|
#include "randomly_seeded_hash.h"
|
|
|
|
#include "stringpiece.h"
|
2010-09-10 10:07:06 +03:00
|
|
|
#include "trigraph.h"
|
2010-06-28 22:01:18 +03:00
|
|
|
|
2010-10-25 05:12:47 +03:00
|
|
|
namespace cxxmph {
|
|
|
|
|
2010-06-28 22:01:18 +03:00
|
|
|
class MPHTable {
|
|
|
|
public:
|
2010-10-28 03:17:09 +03:00
|
|
|
// This class could be a template for both key type and hash function, but we
|
|
|
|
// chose to go with simplicity.
|
|
|
|
typedef StringPiece key_type;
|
2010-10-29 03:53:40 +03:00
|
|
|
typedef RandomlySeededHashFunction<Murmur2StringPiece> hasher_type;
|
2010-10-28 03:17:09 +03:00
|
|
|
|
2010-10-25 05:12:47 +03:00
|
|
|
MPHTable(double c = 1.23, cmph_uint8 b = 7) : c_(c), b_(b) { }
|
2010-10-28 03:17:09 +03:00
|
|
|
~MPHTable() {}
|
2010-06-28 22:01:18 +03:00
|
|
|
|
2010-10-05 17:51:17 +03:00
|
|
|
template <class ForwardIterator>
|
2010-09-10 10:07:06 +03:00
|
|
|
bool Reset(ForwardIterator begin, ForwardIterator end);
|
2010-06-28 22:01:18 +03:00
|
|
|
cmph_uint32 index(const key_type& x) const;
|
|
|
|
|
|
|
|
private:
|
2010-10-25 05:12:47 +03:00
|
|
|
template <class ForwardIterator>
|
|
|
|
bool Mapping(ForwardIterator begin, ForwardIterator end,
|
2010-10-28 03:17:09 +03:00
|
|
|
std::vector<TriGraph::Edge>* edges,
|
|
|
|
std::vector<cmph_uint32>* queue);
|
|
|
|
bool GenerateQueue(TriGraph* graph, std::vector<cmph_uint32>* queue);
|
|
|
|
void Assigning(const std::vector<TriGraph::Edge>& edges,
|
|
|
|
const std::vector<cmph_uint32>& queue);
|
|
|
|
void Ranking();
|
|
|
|
cmph_uint32 Search(const key_type& key) const;
|
|
|
|
cmph_uint32 Rank(cmph_uint32 vertex) const;
|
2010-10-25 05:12:47 +03:00
|
|
|
|
|
|
|
// Algorithm parameters
|
|
|
|
double c_; // Number of bits per key (? is it right)
|
2010-10-28 03:17:09 +03:00
|
|
|
cmph_uint8 b_; // Number of bits of the kth index in the ranktable
|
2010-10-25 05:12:47 +03:00
|
|
|
|
|
|
|
// Values used during generation
|
|
|
|
cmph_uint32 m_; // edges count
|
|
|
|
cmph_uint32 n_; // vertex count
|
2010-10-28 03:17:09 +03:00
|
|
|
cmph_uint32 k_; // kth index in ranktable, $k = log_2(n=3r)\varepsilon$
|
2010-10-25 05:12:47 +03:00
|
|
|
|
|
|
|
// Values used during search
|
|
|
|
|
|
|
|
// Partition vertex count, derived from c parameter.
|
|
|
|
cmph_uint32 r_;
|
|
|
|
// The array containing the minimal perfect hash function graph.
|
|
|
|
std::vector<cmph_uint8> g_;
|
|
|
|
// The table used for the rank step of the minimal perfect hash function
|
|
|
|
std::vector<cmph_uint32> ranktable_;
|
|
|
|
// The selected hash function triplet for finding the edges in the minimal
|
|
|
|
// perfect hash function graph.
|
2010-10-28 03:17:09 +03:00
|
|
|
hasher_type hash_function_[3];
|
2010-10-25 05:12:47 +03:00
|
|
|
|
2010-06-28 22:01:18 +03:00
|
|
|
};
|
|
|
|
|
2010-10-28 03:17:09 +03:00
|
|
|
// Template method needs to go in the header file.
|
|
|
|
template <class ForwardIterator>
|
|
|
|
bool MPHTable::Reset(ForwardIterator begin, ForwardIterator end) {
|
|
|
|
m_ = end - begin;
|
|
|
|
r_ = static_cast<cmph_uint32>(ceil((c_*m_)/3));
|
|
|
|
if ((r_ % 2) == 0) r_ += 1;
|
|
|
|
n_ = 3*r_;
|
|
|
|
k_ = 1U << b_;
|
|
|
|
|
|
|
|
cerr << "m " << m_ << " n " << n_ << " r " << r_ << endl;
|
|
|
|
|
|
|
|
int iterations = 1000;
|
|
|
|
std::vector<TriGraph::Edge> edges;
|
|
|
|
std::vector<cmph_uint32> queue;
|
|
|
|
while (1) {
|
|
|
|
cerr << "Iterations missing: " << iterations << endl;
|
2010-10-29 03:53:40 +03:00
|
|
|
for (int i = 0; i < 3; ++i) hash_function_[i] = hasher_type();
|
|
|
|
// hash_function_[0] = hasher_type();
|
2010-10-28 05:45:43 +03:00
|
|
|
cerr << "Seed: " << hash_function_[0].seed << endl;
|
2010-10-28 03:17:09 +03:00
|
|
|
if (Mapping(begin, end, &edges, &queue)) break;
|
|
|
|
else --iterations;
|
|
|
|
if (iterations == 0) break;
|
|
|
|
}
|
|
|
|
if (iterations == 0) return false;
|
|
|
|
Assigning(edges, queue);
|
|
|
|
std::vector<TriGraph::Edge>().swap(edges);
|
|
|
|
Ranking();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class ForwardIterator>
|
|
|
|
bool MPHTable::Mapping(
|
|
|
|
ForwardIterator begin, ForwardIterator end,
|
|
|
|
std::vector<TriGraph::Edge>* edges, std::vector<cmph_uint32>* queue) {
|
|
|
|
TriGraph graph(n_, m_);
|
|
|
|
for (ForwardIterator it = begin; it != end; ++it) {
|
|
|
|
cmph_uint32 h[3];
|
2010-10-29 03:53:40 +03:00
|
|
|
for (int i = 0; i < 3; ++i) h[i] = hash_function_[i](*it);
|
|
|
|
// hash_function_[0](*it, h);
|
2010-10-28 03:17:09 +03:00
|
|
|
cmph_uint32 v0 = h[0] % r_;
|
|
|
|
cmph_uint32 v1 = h[1] % r_ + r_;
|
|
|
|
cmph_uint32 v2 = h[2] % r_ + (r_ << 1);
|
2010-10-28 05:45:43 +03:00
|
|
|
cerr << "Key: " << *it << " edge " << it - begin << " (" << v0 << "," << v1 << "," << v2 << ")" << endl;
|
2010-10-28 03:17:09 +03:00
|
|
|
graph.AddEdge(TriGraph::Edge(v0, v1, v2));
|
|
|
|
}
|
|
|
|
if (GenerateQueue(&graph, queue)) {
|
|
|
|
graph.ExtractEdgesAndClear(edges);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2010-10-25 05:12:47 +03:00
|
|
|
} // namespace cxxmph
|
2010-06-28 22:01:18 +03:00
|
|
|
|
2010-10-28 03:17:09 +03:00
|
|
|
#endif // __CXXMPH_MPHTABLE_H__
|