Lots of work.
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
bin_PROGRAMS = cmph_hash_map_test
|
||||
lib_LTLIBRARIES = libcxxmph.la
|
||||
|
||||
INCLUDES = -I../src/
|
||||
libcxxmph_la_SOURCES = trigragh.h trigraph.cc
|
||||
libcxxmph_la_LDFLAGS = -version-info 0:0:0
|
||||
|
||||
cmph_hash_map_test_LDADD = ../src/libcmph.la
|
||||
cmph_hash_map_test_LDADD = libcxxmph.la
|
||||
cmph_hash_map_test_SOURCES = cmph_hash_map_test.cc
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#include <hash_map>
|
||||
#include <ext/hash_map>
|
||||
#include <vector>
|
||||
#include <utility> // for std::pair
|
||||
|
||||
|
||||
@@ -1,37 +1,105 @@
|
||||
#include <numerical_limits>
|
||||
|
||||
template <int n, int mask = 1 << 7> struct bitcount {
|
||||
enum { value = (n & mask ? 1:0) + bitcount<n, mask >> 1>::value };
|
||||
};
|
||||
template <int n> struct bitcount<n, 0> { enum { value = 0 }; };
|
||||
#include "mphtable.h"
|
||||
|
||||
template <int n, int current, int mask = 1 << 8> struct bitposition {
|
||||
enum
|
||||
using std::vector;
|
||||
|
||||
template <int index = 0, typename op> class CompileTimeByteTable {
|
||||
public:
|
||||
CompileTimeByteTable : current(op<index>::value) { }
|
||||
int operator[] (int i) { return *(¤t + i); }
|
||||
private:
|
||||
unsigned char current;
|
||||
CompileTimeByteTable<next> next;
|
||||
};
|
||||
template <class Key, class HashFcn>
|
||||
template <class ForwardIterator>
|
||||
|
||||
static CompileTimeByteTable<256, bitcount> BitcountTable;
|
||||
void MPHTable::Reset(ForwardIterator begin, ForwardIterator end) {
|
||||
TableBuilderState st;
|
||||
st.c = 1.23;
|
||||
st.b = 7;
|
||||
st.m = end - begin;
|
||||
st.r = static_cast<cmph_uint32>(ceil((st.c*st.m)/3));
|
||||
if ((st.r % 2) == 0) st.r += 1;
|
||||
st.n = 3*st.r;
|
||||
st.k = 1U << st.b;
|
||||
st.ranktablesize = static_cast<cmph_uint32>(
|
||||
ceil(st.n / static_cast<double>(st.k)));
|
||||
st.graph_builder = TriGraph(st.m, st.n); // giant copy
|
||||
st.edges_queue.resize(st.m)
|
||||
|
||||
|
||||
#define mix(a,b,c) \
|
||||
{ \
|
||||
a -= b; a -= c; a ^= (c>>13); \
|
||||
b -= c; b -= a; b ^= (a<<8); \
|
||||
c -= a; c -= b; c ^= (b>>13); \
|
||||
a -= b; a -= c; a ^= (c>>12); \
|
||||
b -= c; b -= a; b ^= (a<<16); \
|
||||
c -= a; c -= b; c ^= (b>>5); \
|
||||
a -= b; a -= c; a ^= (c>>3); \
|
||||
b -= c; b -= a; b ^= (a<<10); \
|
||||
c -= a; c -= b; c ^= (b>>15); \
|
||||
int iterations = 1000;
|
||||
while (1) {
|
||||
hasher hasher0 = HashFcn();
|
||||
ok = Mapping(st.graph_builder, st.edges_queue);
|
||||
if (ok) break;
|
||||
else --iterations;
|
||||
if (iterations == 0) break;
|
||||
}
|
||||
if (iterations == 0) return false;
|
||||
vector<ConnectedEdge> graph;
|
||||
st.graph_builder.ExtractEdgesAndClear(&graph);
|
||||
Assigning(graph, st.edges_queue);
|
||||
vector<cmph_uint32>().swap(st.edges_queue);
|
||||
Ranking(graph);
|
||||
|
||||
}
|
||||
|
||||
template <class Key, class HashFcn>
|
||||
int MPHTable::GenerateQueue(
|
||||
cmph_uint32 nedges, cmph_uint32 nvertices,
|
||||
TriGraph* graph, Queue* queue) {
|
||||
cmph_uint32 queue_head = 0, queue_tail = 0;
|
||||
// Relies on vector<bool> using 1 bit per element
|
||||
vector<bool> marked_edge((nedges >> 3) + 1, false);
|
||||
queue->swap(Queue(nvertices, 0));
|
||||
for (int i = 0; i < nedges; ++i) {
|
||||
TriGraph::Edge e = graph.edges[i].vertices;
|
||||
if (graph.vertex_degree_[e.vertices[0]] == 1 ||
|
||||
graph.vertex_degree_[e.vertices[1]] == 1 ||
|
||||
graph.vertex_degree[e.vertices[2]] == 1) {
|
||||
if (!marked_edge[i]) {
|
||||
(*queue)[queue_head++] = i;
|
||||
marked_edge[i] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
while (queue_tail != queue_head) {
|
||||
cmph_uint32 current_edge = (*queue)[queue_tail++];
|
||||
graph->RemoveEdge(current_edge);
|
||||
TriGraph::Edge e = graph->edges[current_edge];
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
cmph_uint32 v = e.vertices[i];
|
||||
if (graph->vertex_degree[v] == 1) {
|
||||
cmph_uint32 first_edge = graph->first_edge_[v];
|
||||
if (!marked_edge[first_edge) {
|
||||
queue[queue_head++] = first_edge;
|
||||
marked_edge[first_edge] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
vector<bool>().swap(marked_edge);
|
||||
return queue_head - nedges;
|
||||
}
|
||||
|
||||
static const int kMaskStepSelectTable = std::limit<char>::max;
|
||||
template <class Key, class HashFcn>
|
||||
int MPHTable::Mapping(TriGraph* graph, Queue* queue) {
|
||||
int cycles = 0;
|
||||
graph->Reset(m, n);
|
||||
for (ForwardIterator it = begin_; it != end_; ++it) {
|
||||
cmph_uint32 hash_values[3];
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
hash_values[i] = hasher_(*it);
|
||||
}
|
||||
cmph_uint32 v0 = hash_values[0] % bdz->r;
|
||||
cmph_uint32 v1 = hash_values[1] % bdz->r + bdz->r;
|
||||
cmph_uint32 v2 = hash_values[2] % bdz->r + (bdz->r << 1);
|
||||
graph->AddEdge(Edge(v0, v1, v2));
|
||||
}
|
||||
cycles = GenerateQueue(bdz->m, bdz->n, queue, graph);
|
||||
return cycles == 0;
|
||||
}
|
||||
|
||||
void MPHTable::Assigning(TriGraph* graph, Queue* queue) {
|
||||
}
|
||||
void MPHTable::Ranking(TriGraph* graph, Queue* queue) {
|
||||
}
|
||||
cmph_uint32 MPHTable::Search(const key_type& key) {
|
||||
}
|
||||
|
||||
cmph_uint32 MPHTable::Rank(const key_type& key) {
|
||||
}
|
||||
|
||||
@@ -1,83 +1,44 @@
|
||||
// Minimal perfect hash abstraction implementing the BDZ algorithm
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "trigraph.h"
|
||||
|
||||
template <class Key>
|
||||
template <class Key, class NewRandomlySeededHashFcn = __gnu_cxx::hash<Key> >
|
||||
class MPHTable {
|
||||
public:
|
||||
typedef Key key_type;
|
||||
typedef NewRandomlySeededHashFcn hasher;
|
||||
MPHTable();
|
||||
~MPHTable();
|
||||
|
||||
template <class Iterator>
|
||||
template <class ForwardIterator>
|
||||
bool Reset(ForwardIterator begin, ForwardIterator end);
|
||||
cmph_uint32 index(const key_type& x) const;
|
||||
|
||||
private:
|
||||
typedef vector<cmph_uint32> Queue;
|
||||
typedef std::vector<cmph_uint32> Queue;
|
||||
template<class ForwardIterator>
|
||||
struct TableBuilderState {
|
||||
ForwardIterator begin;
|
||||
ForwardIterator end;
|
||||
Queue edges_queue;
|
||||
TriGraph graph_builder;
|
||||
double c;
|
||||
cmph_uint32 m;
|
||||
cmph_uint32 n;
|
||||
cmph_uint32 k;
|
||||
cmph_uint32 ranktablesize;
|
||||
};
|
||||
int GenerateQueue(
|
||||
cmph_uint32 nedges, cmph_uint32 nvertices,
|
||||
TriGraph* graph, Queue* queue);
|
||||
void Assigning(TriGraph* graph, Queue* queue);
|
||||
void Ranking(TriGraph* graph, Queue* queue);
|
||||
cmph_uint32 Search(const StringPiece& key);
|
||||
cmph_uint32 Rank(const StringPiece& key);
|
||||
|
||||
// Generates three hash values for k in a single pass.
|
||||
static hash_vector(cmph_uint32 seed, const char* k, cmph_uint32 keylen, cmph_uint32* hashes) ;
|
||||
std::vector<ConnectedEdge> graph_;
|
||||
};
|
||||
|
||||
int MPHTable::GenerateQueue(
|
||||
cmph_uint32 nedges, cmph_uint32 nvertices,
|
||||
TriGraph* graph, Queue* queue) {
|
||||
cmph_uint32 queue_head = 0, queue_tail = 0;
|
||||
vector<bool> marked_edge((nedges >> 3) + 1, false);
|
||||
queue->swap(Queue(nvertices, 0));
|
||||
for (int i = 0; i < nedges; ++i) {
|
||||
TriGraph::Edge e = graph.edges[i].vertices;
|
||||
if (graph.vertex_degree_[e.vertices[0]] == 1 ||
|
||||
graph.vertex_degree_[e.vertices[1]] == 1 ||
|
||||
graph.vertex_degree[e.vertices[2]] == 1) {
|
||||
if (!marked_edge[i]) {
|
||||
(*queue)[queue_head++] = i;
|
||||
marked_edge[i] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
while (queue_tail != queue_head) {
|
||||
cmph_uint32 current_edge = (*queue)[queue_tail++];
|
||||
graph->RemoveEdge(current_edge);
|
||||
TriGraph::Edge e = graph->edges[current_edge];
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
cmph_uint32 v = e.vertices[i];
|
||||
if (graph->vertex_degree[v] == 1) {
|
||||
cmph_uint32 first_edge = graph->first_edge_[v];
|
||||
if (!marked_edge[first_edge) {
|
||||
queue[queue_head++] = first_edge;
|
||||
marked_edge[first_edge] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
marked_edge.swap(vector<bool>());
|
||||
return queue_head - nedges;
|
||||
}
|
||||
|
||||
int MPHTable::Mapping(TriGraph* graph, Queue* queue) {
|
||||
int cycles = 0;
|
||||
cmph_uint32 hl[3];
|
||||
graph->Reset(m, n);
|
||||
ForwardIterator it = begin;
|
||||
for (cmph_uint32 e = 0; e < end - begin; ++e) {
|
||||
cmph_uint32 h0, h1, h2;
|
||||
StringPiece key = *it;
|
||||
hash_vector(bdz->hl, key.data(), key.len(), hl);
|
||||
h0 = hl[0] % bdz->r;
|
||||
h1 = hl[1] % bdz->r + bdz->r;
|
||||
h2 = hl[2] % bdz->r + (bdz->r << 1);
|
||||
AddEdge(graph, h0, h1, h2);
|
||||
}
|
||||
cycles = GenerateQueue(bdz->m, bdz->n, queue, graph);
|
||||
return cycles == 0;
|
||||
}
|
||||
|
||||
void MPHTable::Assigning(TriGraph* graph, Queue* queue);
|
||||
void MPHTable::Ranking(TriGraph* graph, Queue* queue);
|
||||
cmph_uint32 MPHTable::Search(const StringPiece& key);
|
||||
cmph_uint32 MPHTable::Rank(const StringPiece& key);
|
||||
|
||||
@@ -1,18 +1,22 @@
|
||||
#include <limits>
|
||||
|
||||
#include "trigraph.h"
|
||||
|
||||
using std::vector;
|
||||
|
||||
namespace {
|
||||
static const cmph_uint8 kInvalidEdge = std::limits<cmph_uint8>::max;
|
||||
static const cmph_uint8 kInvalidEdge = std::numeric_limits<cmph_uint8>::max();
|
||||
}
|
||||
|
||||
TriGraph::TriGraph(cmph_uint32 nedges, cmph_uint32 nvertices)
|
||||
: nedges_(0),
|
||||
edges_(nedges, 0),
|
||||
edges_(nedges),
|
||||
first_edge_(nvertices, kInvalidEdge),
|
||||
vertex_degree_(nvertices, 0) { }
|
||||
|
||||
void Trigraph::ExtractEdgesAndClear(vector<ConnectedEdge>* edges) {
|
||||
first_edge_.swap(vector<cmph_uint32>());
|
||||
vertex_degree_.swap(vector<cmph_uint8>());
|
||||
void TriGraph::ExtractEdgesAndClear(vector<ConnectedEdge>* edges) {
|
||||
vector<cmph_uint32>().swap(first_edge_);
|
||||
vector<cmph_uint8>().swap(vertex_degree_);
|
||||
nedges_ = 0;
|
||||
edges->swap(edges_);
|
||||
}
|
||||
@@ -1,5 +1,10 @@
|
||||
#include <vector>
|
||||
|
||||
#include "../src/cmph_types.h"
|
||||
|
||||
class TriGraph {
|
||||
struct Edge {
|
||||
Edge() { }
|
||||
Edge(cmph_uint32 v0, cmph_uint32 v1, cmph_uint32 v2);
|
||||
cmph_uint32 vertices[3];
|
||||
};
|
||||
@@ -9,13 +14,13 @@ class TriGraph {
|
||||
};
|
||||
|
||||
TriGraph(cmph_uint32 nedges, cmph_uint32 nvertices);
|
||||
void AddEdge(cmph_uint32 v0, cmph_uint32 v1, cmph_uint32 v2);
|
||||
void AddEdge(const Edge& edge);
|
||||
void RemoveEdge(cmph_uint32 current_edge);
|
||||
void ExtractEdgesAndClear(vector<ConnectedEdge>* edges);
|
||||
void ExtractEdgesAndClear(std::vector<ConnectedEdge>* edges);
|
||||
|
||||
private:
|
||||
cmph_uint32 nedges_;
|
||||
vector<ConnectedEdge> edges_;
|
||||
vector<cmph_uint32> first_edge_;
|
||||
vector<cmph_uint8> vertex_degree_;
|
||||
std::vector<ConnectedEdge> edges_;
|
||||
std::vector<cmph_uint32> first_edge_;
|
||||
std::vector<cmph_uint8> vertex_degree_;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user