Lots of work.

This commit is contained in:
Davi de Castro Reis
2010-10-05 11:51:17 -03:00
parent f04df98f91
commit bf0c5892d8
9 changed files with 378 additions and 110 deletions

View File

@@ -1,6 +1,8 @@
bin_PROGRAMS = cmph_hash_map_test
lib_LTLIBRARIES = libcxxmph.la
INCLUDES = -I../src/
libcxxmph_la_SOURCES = trigragh.h trigraph.cc
libcxxmph_la_LDFLAGS = -version-info 0:0:0
cmph_hash_map_test_LDADD = ../src/libcmph.la
cmph_hash_map_test_LDADD = libcxxmph.la
cmph_hash_map_test_SOURCES = cmph_hash_map_test.cc

View File

@@ -1,4 +1,4 @@
#include <hash_map>
#include <ext/hash_map>
#include <vector>
#include <utility> // for std::pair

View File

@@ -1,37 +1,105 @@
#include <numerical_limits>
template <int n, int mask = 1 << 7> struct bitcount {
enum { value = (n & mask ? 1:0) + bitcount<n, mask >> 1>::value };
};
template <int n> struct bitcount<n, 0> { enum { value = 0 }; };
#include "mphtable.h"
template <int n, int current, int mask = 1 << 8> struct bitposition {
enum
using std::vector;
template <int index = 0, typename op> class CompileTimeByteTable {
public:
CompileTimeByteTable : current(op<index>::value) { }
int operator[] (int i) { return *(&current + i); }
private:
unsigned char current;
CompileTimeByteTable<next> next;
};
template <class Key, class HashFcn>
template <class ForwardIterator>
static CompileTimeByteTable<256, bitcount> BitcountTable;
void MPHTable::Reset(ForwardIterator begin, ForwardIterator end) {
TableBuilderState st;
st.c = 1.23;
st.b = 7;
st.m = end - begin;
st.r = static_cast<cmph_uint32>(ceil((st.c*st.m)/3));
if ((st.r % 2) == 0) st.r += 1;
st.n = 3*st.r;
st.k = 1U << st.b;
st.ranktablesize = static_cast<cmph_uint32>(
ceil(st.n / static_cast<double>(st.k)));
st.graph_builder = TriGraph(st.m, st.n); // giant copy
st.edges_queue.resize(st.m)
#define mix(a,b,c) \
{ \
a -= b; a -= c; a ^= (c>>13); \
b -= c; b -= a; b ^= (a<<8); \
c -= a; c -= b; c ^= (b>>13); \
a -= b; a -= c; a ^= (c>>12); \
b -= c; b -= a; b ^= (a<<16); \
c -= a; c -= b; c ^= (b>>5); \
a -= b; a -= c; a ^= (c>>3); \
b -= c; b -= a; b ^= (a<<10); \
c -= a; c -= b; c ^= (b>>15); \
int iterations = 1000;
while (1) {
hasher hasher0 = HashFcn();
ok = Mapping(st.graph_builder, st.edges_queue);
if (ok) break;
else --iterations;
if (iterations == 0) break;
}
if (iterations == 0) return false;
vector<ConnectedEdge> graph;
st.graph_builder.ExtractEdgesAndClear(&graph);
Assigning(graph, st.edges_queue);
vector<cmph_uint32>().swap(st.edges_queue);
Ranking(graph);
}
template <class Key, class HashFcn>
int MPHTable::GenerateQueue(
cmph_uint32 nedges, cmph_uint32 nvertices,
TriGraph* graph, Queue* queue) {
cmph_uint32 queue_head = 0, queue_tail = 0;
// Relies on vector<bool> using 1 bit per element
vector<bool> marked_edge((nedges >> 3) + 1, false);
queue->swap(Queue(nvertices, 0));
for (int i = 0; i < nedges; ++i) {
TriGraph::Edge e = graph.edges[i].vertices;
if (graph.vertex_degree_[e.vertices[0]] == 1 ||
graph.vertex_degree_[e.vertices[1]] == 1 ||
graph.vertex_degree[e.vertices[2]] == 1) {
if (!marked_edge[i]) {
(*queue)[queue_head++] = i;
marked_edge[i] = true;
}
}
}
while (queue_tail != queue_head) {
cmph_uint32 current_edge = (*queue)[queue_tail++];
graph->RemoveEdge(current_edge);
TriGraph::Edge e = graph->edges[current_edge];
for (int i = 0; i < 3; ++i) {
cmph_uint32 v = e.vertices[i];
if (graph->vertex_degree[v] == 1) {
cmph_uint32 first_edge = graph->first_edge_[v];
if (!marked_edge[first_edge) {
queue[queue_head++] = first_edge;
marked_edge[first_edge] = true;
}
}
}
}
vector<bool>().swap(marked_edge);
return queue_head - nedges;
}
static const int kMaskStepSelectTable = std::limit<char>::max;
template <class Key, class HashFcn>
int MPHTable::Mapping(TriGraph* graph, Queue* queue) {
int cycles = 0;
graph->Reset(m, n);
for (ForwardIterator it = begin_; it != end_; ++it) {
cmph_uint32 hash_values[3];
for (int i = 0; i < 3; ++i) {
hash_values[i] = hasher_(*it);
}
cmph_uint32 v0 = hash_values[0] % bdz->r;
cmph_uint32 v1 = hash_values[1] % bdz->r + bdz->r;
cmph_uint32 v2 = hash_values[2] % bdz->r + (bdz->r << 1);
graph->AddEdge(Edge(v0, v1, v2));
}
cycles = GenerateQueue(bdz->m, bdz->n, queue, graph);
return cycles == 0;
}
void MPHTable::Assigning(TriGraph* graph, Queue* queue) {
}
void MPHTable::Ranking(TriGraph* graph, Queue* queue) {
}
cmph_uint32 MPHTable::Search(const key_type& key) {
}
cmph_uint32 MPHTable::Rank(const key_type& key) {
}

View File

@@ -1,83 +1,44 @@
// Minimal perfect hash abstraction implementing the BDZ algorithm
#include <vector>
#include "trigraph.h"
template <class Key>
template <class Key, class NewRandomlySeededHashFcn = __gnu_cxx::hash<Key> >
class MPHTable {
public:
typedef Key key_type;
typedef NewRandomlySeededHashFcn hasher;
MPHTable();
~MPHTable();
template <class Iterator>
template <class ForwardIterator>
bool Reset(ForwardIterator begin, ForwardIterator end);
cmph_uint32 index(const key_type& x) const;
private:
typedef vector<cmph_uint32> Queue;
typedef std::vector<cmph_uint32> Queue;
template<class ForwardIterator>
struct TableBuilderState {
ForwardIterator begin;
ForwardIterator end;
Queue edges_queue;
TriGraph graph_builder;
double c;
cmph_uint32 m;
cmph_uint32 n;
cmph_uint32 k;
cmph_uint32 ranktablesize;
};
int GenerateQueue(
cmph_uint32 nedges, cmph_uint32 nvertices,
TriGraph* graph, Queue* queue);
void Assigning(TriGraph* graph, Queue* queue);
void Ranking(TriGraph* graph, Queue* queue);
cmph_uint32 Search(const StringPiece& key);
cmph_uint32 Rank(const StringPiece& key);
// Generates three hash values for k in a single pass.
static hash_vector(cmph_uint32 seed, const char* k, cmph_uint32 keylen, cmph_uint32* hashes) ;
std::vector<ConnectedEdge> graph_;
};
int MPHTable::GenerateQueue(
cmph_uint32 nedges, cmph_uint32 nvertices,
TriGraph* graph, Queue* queue) {
cmph_uint32 queue_head = 0, queue_tail = 0;
vector<bool> marked_edge((nedges >> 3) + 1, false);
queue->swap(Queue(nvertices, 0));
for (int i = 0; i < nedges; ++i) {
TriGraph::Edge e = graph.edges[i].vertices;
if (graph.vertex_degree_[e.vertices[0]] == 1 ||
graph.vertex_degree_[e.vertices[1]] == 1 ||
graph.vertex_degree[e.vertices[2]] == 1) {
if (!marked_edge[i]) {
(*queue)[queue_head++] = i;
marked_edge[i] = true;
}
}
}
while (queue_tail != queue_head) {
cmph_uint32 current_edge = (*queue)[queue_tail++];
graph->RemoveEdge(current_edge);
TriGraph::Edge e = graph->edges[current_edge];
for (int i = 0; i < 3; ++i) {
cmph_uint32 v = e.vertices[i];
if (graph->vertex_degree[v] == 1) {
cmph_uint32 first_edge = graph->first_edge_[v];
if (!marked_edge[first_edge) {
queue[queue_head++] = first_edge;
marked_edge[first_edge] = true;
}
}
}
}
marked_edge.swap(vector<bool>());
return queue_head - nedges;
}
int MPHTable::Mapping(TriGraph* graph, Queue* queue) {
int cycles = 0;
cmph_uint32 hl[3];
graph->Reset(m, n);
ForwardIterator it = begin;
for (cmph_uint32 e = 0; e < end - begin; ++e) {
cmph_uint32 h0, h1, h2;
StringPiece key = *it;
hash_vector(bdz->hl, key.data(), key.len(), hl);
h0 = hl[0] % bdz->r;
h1 = hl[1] % bdz->r + bdz->r;
h2 = hl[2] % bdz->r + (bdz->r << 1);
AddEdge(graph, h0, h1, h2);
}
cycles = GenerateQueue(bdz->m, bdz->n, queue, graph);
return cycles == 0;
}
void MPHTable::Assigning(TriGraph* graph, Queue* queue);
void MPHTable::Ranking(TriGraph* graph, Queue* queue);
cmph_uint32 MPHTable::Search(const StringPiece& key);
cmph_uint32 MPHTable::Rank(const StringPiece& key);

View File

@@ -1,18 +1,22 @@
#include <limits>
#include "trigraph.h"
using std::vector;
namespace {
static const cmph_uint8 kInvalidEdge = std::limits<cmph_uint8>::max;
static const cmph_uint8 kInvalidEdge = std::numeric_limits<cmph_uint8>::max();
}
TriGraph::TriGraph(cmph_uint32 nedges, cmph_uint32 nvertices)
: nedges_(0),
edges_(nedges, 0),
edges_(nedges),
first_edge_(nvertices, kInvalidEdge),
vertex_degree_(nvertices, 0) { }
void Trigraph::ExtractEdgesAndClear(vector<ConnectedEdge>* edges) {
first_edge_.swap(vector<cmph_uint32>());
vertex_degree_.swap(vector<cmph_uint8>());
void TriGraph::ExtractEdgesAndClear(vector<ConnectedEdge>* edges) {
vector<cmph_uint32>().swap(first_edge_);
vector<cmph_uint8>().swap(vertex_degree_);
nedges_ = 0;
edges->swap(edges_);
}

View File

@@ -1,5 +1,10 @@
#include <vector>
#include "../src/cmph_types.h"
class TriGraph {
struct Edge {
Edge() { }
Edge(cmph_uint32 v0, cmph_uint32 v1, cmph_uint32 v2);
cmph_uint32 vertices[3];
};
@@ -9,13 +14,13 @@ class TriGraph {
};
TriGraph(cmph_uint32 nedges, cmph_uint32 nvertices);
void AddEdge(cmph_uint32 v0, cmph_uint32 v1, cmph_uint32 v2);
void AddEdge(const Edge& edge);
void RemoveEdge(cmph_uint32 current_edge);
void ExtractEdgesAndClear(vector<ConnectedEdge>* edges);
void ExtractEdgesAndClear(std::vector<ConnectedEdge>* edges);
private:
cmph_uint32 nedges_;
vector<ConnectedEdge> edges_;
vector<cmph_uint32> first_edge_;
vector<cmph_uint8> vertex_degree_;
std::vector<ConnectedEdge> edges_;
std::vector<cmph_uint32> first_edge_;
std::vector<cmph_uint8> vertex_degree_;
};