Added half nibble code.
This commit is contained in:
parent
724e716d67
commit
385ce27a10
@ -1,4 +1,4 @@
|
|||||||
bin_PROGRAMS = cmph_hash_map_test mphtable_test
|
bin_PROGRAMS = cmph_hash_map_test mphtable_test trigraph_test
|
||||||
lib_LTLIBRARIES = libcxxmph.la
|
lib_LTLIBRARIES = libcxxmph.la
|
||||||
|
|
||||||
libcxxmph_la_SOURCES = stringpiece.h MurmurHash2.h randomly_seeded_hash.h trigragh.h trigraph.cc mphtable.h mphtable.cc
|
libcxxmph_la_SOURCES = stringpiece.h MurmurHash2.h randomly_seeded_hash.h trigragh.h trigraph.cc mphtable.h mphtable.cc
|
||||||
@ -9,3 +9,6 @@ cmph_hash_map_test_SOURCES = cmph_hash_map_test.cc
|
|||||||
|
|
||||||
mphtable_test_LDADD = libcxxmph.la
|
mphtable_test_LDADD = libcxxmph.la
|
||||||
mphtable_test_SOURCES = mphtable_test.cc
|
mphtable_test_SOURCES = mphtable_test.cc
|
||||||
|
|
||||||
|
trigraph_test_LDADD = libcxxmph.la
|
||||||
|
trigraph_test_SOURCES = trigraph_test.cc
|
||||||
|
@ -1,3 +1,6 @@
|
|||||||
|
#ifndef __CXXMPH_MURMUR_HASH2__
|
||||||
|
#define __CXXMPH_MURMUR_HASH2__
|
||||||
|
|
||||||
//-----------------------------------------------------------------------------
|
//-----------------------------------------------------------------------------
|
||||||
// MurmurHash2, by Austin Appleby
|
// MurmurHash2, by Austin Appleby
|
||||||
|
|
||||||
@ -12,6 +15,8 @@
|
|||||||
// 2. It will not produce the same results on little-endian and big-endian
|
// 2. It will not produce the same results on little-endian and big-endian
|
||||||
// machines.
|
// machines.
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed )
|
unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed )
|
||||||
{
|
{
|
||||||
// 'm' and 'r' are mixing constants generated offline.
|
// 'm' and 'r' are mixing constants generated offline.
|
||||||
@ -62,3 +67,7 @@ unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed )
|
|||||||
|
|
||||||
return h;
|
return h;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // __CXXMPH_MURMUR_HASH2__
|
||||||
|
@ -1,49 +1,58 @@
|
|||||||
#include <limits>
|
#include <limits>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
using std::cerr;
|
||||||
|
using std::endl;
|
||||||
|
|
||||||
#include "mphtable.h"
|
#include "mphtable.h"
|
||||||
|
|
||||||
using std::vector;
|
using std::vector;
|
||||||
|
|
||||||
namespace cxxmph {
|
namespace {
|
||||||
|
|
||||||
template <class Key, class HashFcn>
|
static const cmph_uint8 kUnassigned = 3;
|
||||||
template <class ForwardIterator>
|
// table used for looking up the number of assigned vertices to a 8-bit integer
|
||||||
bool MPHTable<Key, HashFcn>::Reset(ForwardIterator begin, ForwardIterator end) {
|
static cmph_uint8 kBdzLookupTable[] =
|
||||||
TableBuilderState<ForwardIterator> st;
|
{
|
||||||
m_ = end - begin;
|
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||||
r_ = static_cast<cmph_uint32>(ceil((c_*m_)/3));
|
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||||
if (r_ % 2) == 0) r_ += 1;
|
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||||
n_ = 3*r_;
|
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
||||||
k_ = 1U << b_;
|
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||||
|
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||||
|
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||||
|
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
||||||
|
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||||
|
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||||
|
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||||
|
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
||||||
|
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
||||||
|
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
||||||
|
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
||||||
|
2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1, 0
|
||||||
|
};
|
||||||
|
|
||||||
int iterations = 1000;
|
static const cmph_uint8 valuemask[] = { 0xfc, 0xf3, 0xcf, 0x3f};
|
||||||
while (1) {
|
void set_2bit_value(vector<cmph_uint8> *d, cmph_uint8 i, cmph_uint8 v) {
|
||||||
for (int i = 0; i < 3; ++i) hash_function_[i] = hasher();
|
(*d)[(i >> 2)] &= (v << ((i & 3) << 1)) | valuemask[i & 3];
|
||||||
vector<Edge> edges;
|
}
|
||||||
vector<cmph_uint32> queue;
|
cmph_uint8 get_2bit_value(const vector<cmph_uint8>& d, cmph_uint8 i) {
|
||||||
if (Mapping(begin, end, &edges, &queue)) break;
|
return (d[(i >> 2)] >> ((i & 3) << 1)) & 3;
|
||||||
else --iterations;
|
|
||||||
if (iterations == 0) break;
|
|
||||||
}
|
|
||||||
if (iterations == 0) return false;
|
|
||||||
vector<Edge>& edges;
|
|
||||||
graph->ExtractEdgesAndClear(&edges);
|
|
||||||
Assigning(queue, edges);
|
|
||||||
vector<cmph_uint32>().swap(edges);
|
|
||||||
Ranking();
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Key, class HashFcn>
|
} // anonymous namespace
|
||||||
bool MPHTable<Key, HashFcn>::GenerateQueue(
|
|
||||||
|
namespace cxxmph {
|
||||||
|
|
||||||
|
bool MPHTable::GenerateQueue(
|
||||||
TriGraph* graph, vector<cmph_uint32>* queue_output) {
|
TriGraph* graph, vector<cmph_uint32>* queue_output) {
|
||||||
cmph_uint32 queue_head = 0, queue_tail = 0;
|
cmph_uint32 queue_head = 0, queue_tail = 0;
|
||||||
cmph_uint32 nedges = n_;
|
cmph_uint32 nedges = m_;
|
||||||
cmph_uint32 nvertices = m_;
|
cmph_uint32 nvertices = n_;
|
||||||
// Relies on vector<bool> using 1 bit per element
|
// Relies on vector<bool> using 1 bit per element
|
||||||
vector<bool> marked_edge((nedges >> 3) + 1, false);
|
vector<bool> marked_edge((nedges >> 3) + 1, false);
|
||||||
Queue queue(nvertices, 0);
|
vector<cmph_uint32> queue(nvertices, 0);
|
||||||
for (int i = 0; i < nedges; ++i) {
|
for (cmph_uint32 i = 0; i < nedges; ++i) {
|
||||||
const TriGraph::Edge& e = graph->edges()[i];
|
const TriGraph::Edge& e = graph->edges()[i];
|
||||||
if (graph->vertex_degree()[e[0]] == 1 ||
|
if (graph->vertex_degree()[e[0]] == 1 ||
|
||||||
graph->vertex_degree()[e[1]] == 1 ||
|
graph->vertex_degree()[e[1]] == 1 ||
|
||||||
@ -74,102 +83,56 @@ bool MPHTable<Key, HashFcn>::GenerateQueue(
|
|||||||
return cycles == 0;
|
return cycles == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Key, class HashFcn>
|
void MPHTable::Assigning(
|
||||||
template <class ForwardIterator>
|
const vector<TriGraph::Edge>& edges, const vector<cmph_uint32>& queue) {
|
||||||
bool MPHTable<Key, HashFcn>::Mapping(
|
|
||||||
ForwardIterator begin, ForwardIterator end,
|
|
||||||
vector<Edge>* edges, vector<cmph_uint32> queue) {
|
|
||||||
int cycles = 0;
|
|
||||||
TriGraph graph(m, n);
|
|
||||||
for (ForwardIterator it = begin; it != end; ++it) {
|
|
||||||
cmph_uint32 h[3];
|
|
||||||
for (int i = 0; i < 3; ++i) h[i] = hash_function_[i](*it);
|
|
||||||
cmph_uint32 v0 = h[0] % r_;
|
|
||||||
cmph_uint32 v1 = h[1] % r_ + r_;
|
|
||||||
cmph_uint32 v2 = h[2] % r_ + (r_ << 1);
|
|
||||||
graph.AddEdge(Edge(v0, v1, v2));
|
|
||||||
}
|
|
||||||
if (GenerateQueue(&graph, queue)) {
|
|
||||||
graph.ExtractEdgesAndClear(edges);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class Key, class HashFcn>
|
|
||||||
void MPHTable<Key, HashFcn>::Assigning(
|
|
||||||
const vector<Edge>& edges, const vector<cmph_uint32>& queue) {
|
|
||||||
cmph_uint32 nedges = n_;
|
cmph_uint32 nedges = n_;
|
||||||
cmph_uint32 current_edge = 0;
|
cmph_uint32 current_edge = 0;
|
||||||
vector<bool> marked_vertices(nedges + 1);
|
vector<bool> marked_vertices(nedges + 1);
|
||||||
// TODO(davi) use half nibbles instead
|
// Initialize vector of half nibbles with all bits set.
|
||||||
// vector<cmph_uint8> g(static_cast<cmph_uint32>(ceil(nedges / 4.0)),
|
vector<cmph_uint8>(nedges, std::numeric_limits<cmph_uint8>::max()).swap(g_);
|
||||||
// std::numerical_limits<cmph_uint8>::max());
|
|
||||||
static const cmph_uint8 kUnassigned = 3;
|
|
||||||
vector<cmph_uint8>(nedges, kUnassigned).swap(g_);
|
|
||||||
for (int i = nedges - 1; i + 1 >= 1; --i) {
|
for (int i = nedges - 1; i + 1 >= 1; --i) {
|
||||||
current_edge = queue[i];
|
current_edge = queue[i];
|
||||||
const TriGraph::Edge& e = edges[current_edge];
|
const TriGraph::Edge& e = edges[current_edge];
|
||||||
if (!marked_vertices[e[0]]) {
|
if (!marked_vertices[e[0]]) {
|
||||||
if (!marked_vertices[e[1]]) {
|
if (!marked_vertices[e[1]]) {
|
||||||
g_[e[1]] = kUnassigned;
|
set_2bit_value(&g_, e[1], kUnassigned);
|
||||||
marked_vertices[e[1]] = true;
|
marked_vertices[e[1]] = true;
|
||||||
}
|
}
|
||||||
if (!marked_vertices[e[2]]) {
|
if (!marked_vertices[e[2]]) {
|
||||||
g_[e[2]] = kUnassigned;
|
set_2bit_value(&g_, e[2], kUnassigned);
|
||||||
marked_vertices[e[2]] = true;
|
marked_vertices[e[2]] = true;
|
||||||
}
|
}
|
||||||
g_[e[0]] = (6 - g_[e[1]] + g_[e2]) % 3;
|
set_2bit_value(&g_, e[0], (6 - (get_2bit_value(g_, e[1]) + get_2bit_value(g_, e[2]))) % 3);
|
||||||
marked_vertices[e[0]] = true;
|
marked_vertices[e[0]] = true;
|
||||||
} else if (!marked_vertices[e[1]])) {
|
} else if (!marked_vertices[e[1]]) {
|
||||||
if (!marked_vertices[e[2]])) {
|
if (!marked_vertices[e[2]]) {
|
||||||
g_[e[2]] = kUnassigned;
|
set_2bit_value(&g_, e[2], kUnassigned);
|
||||||
marked_vertices[e[2]] = true;
|
marked_vertices[e[2]] = true;
|
||||||
}
|
}
|
||||||
g_[e[1]] = 7 - (g_[e[0]] + g_[e[2]]) % 3;
|
set_2bit_value(&g_, e[1], (7 - (get_2bit_value(g_, e[0]) + get_2bit_value(g_, e[2]))) % 3);
|
||||||
marked_vertices[e[1]] = true;
|
marked_vertices[e[1]] = true;
|
||||||
} else {
|
} else {
|
||||||
g_[e[2]] = (8 - g_[e[0]] + g_[e[1]]) % 3;
|
set_2bit_value(&g_, e[2], (8 - (get_2bit_value(g_, e[0]) + get_2bit_value(g_, e[1]))) % 3);
|
||||||
marked_vertices[e[2]] = true;
|
marked_vertices[e[2]] = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// table used for looking up the number of assigned vertices to a 8-bit integer
|
void MPHTable::Ranking() {
|
||||||
static cmph_uint8 kBdzLookupTable[] =
|
cmph_uint32 nbytes_total = static_cast<cmph_uint32>(ceil(n_ / 4.0));
|
||||||
{
|
|
||||||
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
|
||||||
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
|
||||||
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
|
||||||
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
|
||||||
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
|
||||||
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
|
||||||
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
|
||||||
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
|
||||||
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
|
||||||
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
|
||||||
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
|
||||||
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
|
||||||
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
|
||||||
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
|
||||||
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
|
||||||
2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1, 0
|
|
||||||
};
|
|
||||||
|
|
||||||
template <class Key, class HashFcn>
|
|
||||||
void MPHTable<Key, HashFcn>::Ranking() {
|
|
||||||
cmph_uint32 nbytes_total = static_cast<cmph_uint32>(ceil(st->n / 4.0));
|
|
||||||
cmph_uint32 size = k_ >> 2U;
|
cmph_uint32 size = k_ >> 2U;
|
||||||
ranktablesize = static_cast<cmph_uint32>(ceil(n_ / static_cast<double>(k_)));
|
cmph_uint32 ranktablesize = static_cast<cmph_uint32>(
|
||||||
// TODO(davi) Change swap of member classes for resize + memset to avoid fragmentation
|
ceil(n_ / static_cast<double>(k_)));
|
||||||
|
// TODO(davi) Change swap of member classes for resize + memset to avoid
|
||||||
|
// fragmentation
|
||||||
vector<cmph_uint32> (ranktablesize).swap(ranktable_);;
|
vector<cmph_uint32> (ranktablesize).swap(ranktable_);;
|
||||||
cmph_uint32 offset = 0;
|
cmph_uint32 offset = 0;
|
||||||
cmph_uint32 count = 0;
|
cmph_uint32 count = 0;
|
||||||
cmph_uint32 i = 0;
|
cmph_uint32 i = 0;
|
||||||
while (1) {
|
while (1) {
|
||||||
if (i == ranktable.size()) break;
|
if (i == ranktable_.size()) break;
|
||||||
cmph_uint32 nbytes = size < nbytes_total ? size : nbytes_total;
|
cmph_uint32 nbytes = size < nbytes_total ? size : nbytes_total;
|
||||||
for (j = 0; j < nbytes; ++j) count += kBdzLookupTable[g_[offset + j]];
|
for (cmph_uint32 j = 0; j < nbytes; ++j) count += kBdzLookupTable[g_[offset + j]];
|
||||||
ranktable_[i] = count;
|
ranktable_[i] = count;
|
||||||
offset += nbytes;
|
offset += nbytes;
|
||||||
nbytes_total -= size;
|
nbytes_total -= size;
|
||||||
@ -177,36 +140,32 @@ void MPHTable<Key, HashFcn>::Ranking() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Key, class HashFcn>
|
cmph_uint32 MPHTable::Search(const key_type& key) const {
|
||||||
cmph_uint32 MPHTable<Key, HashFcn>::Search(const key_type& key) const {
|
|
||||||
cmph_uint32 vertex;
|
|
||||||
cmph_uint32 h[3];
|
cmph_uint32 h[3];
|
||||||
for (int i = 0; i < 3; ++i) h[i] = hash_function_[i](key);
|
for (int i = 0; i < 3; ++i) h[i] = hash_function_[i](key);
|
||||||
h[0] = h[0] % st->r;
|
h[0] = h[0] % r_;
|
||||||
h[1] = h[1] % st->r + st->r;
|
h[1] = h[1] % r_ + r_;
|
||||||
h[2] = h[2] % st->r + (st->r << 1);
|
h[2] = h[2] % r_ + (r_ << 1);
|
||||||
cmph_uint32 vertex = h[(h[g_[h[0]] + g_[h[1]] + g_[h[2]]) % 3];
|
cmph_uint32 vertex = h[(g_[h[0]] + g_[h[1]] + g_[h[2]]) % 3];
|
||||||
return Rank(st->b, st->ranktable, vertex);
|
return Rank(vertex);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Key, class HashFcn>
|
cmph_uint32 MPHTable::Rank(cmph_uint32 vertex) const {
|
||||||
cmph_uint32 MPHTable<Key, HashFcn>::Rank(cmph_uint32 vertex) const {
|
|
||||||
cmph_uint32 index = vertex >> b_;
|
cmph_uint32 index = vertex >> b_;
|
||||||
cmph_uint32 base_rank = ranktable_[index];
|
cmph_uint32 base_rank = ranktable_[index];
|
||||||
cmph_uint32 beg_idx_v = index << b;
|
cmph_uint32 beg_idx_v = index << b_;
|
||||||
cmph_uint32 beg_idx_b = index >> 2
|
cmph_uint32 beg_idx_b = index >> 2;
|
||||||
cmph_uint32 end_idx_b = index >> 2
|
cmph_uint32 end_idx_b = index >> 2;
|
||||||
while (beg_idx_b < end_idx_b) base_rank += kBdzLookupTable[g_[beg_idx_b++]];
|
while (beg_idx_b < end_idx_b) base_rank += kBdzLookupTable[g_[beg_idx_b++]];
|
||||||
beg_idx_v = beg_idx_b << 2;
|
beg_idx_v = beg_idx_b << 2;
|
||||||
while (beg_idx_v < vertex) {
|
while (beg_idx_v < vertex) {
|
||||||
if (g_[beg_idx_v) != kUnassigned) ++base_rank;
|
if (g_[beg_idx_v] != kUnassigned) ++base_rank;
|
||||||
++beg_idx_v;
|
++beg_idx_v;
|
||||||
}
|
}
|
||||||
return base_rank;
|
return base_rank;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Key, class HashFcn>
|
cmph_uint32 MPHTable::index(const key_type& key) const {
|
||||||
cmph_uint32 MPHTable<Key, HashFcn>::index(const key_type& key) const {
|
|
||||||
return Search(key);
|
return Search(key);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3,21 +3,29 @@
|
|||||||
|
|
||||||
// Minimal perfect hash abstraction implementing the BDZ algorithm
|
// Minimal perfect hash abstraction implementing the BDZ algorithm
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
using std::cerr;
|
||||||
|
using std::endl;
|
||||||
|
|
||||||
#include "randomly_seeded_hash.h"
|
#include "randomly_seeded_hash.h"
|
||||||
#include "stringpiece.h"
|
#include "stringpiece.h"
|
||||||
#include "trigraph.h"
|
#include "trigraph.h"
|
||||||
|
|
||||||
namespace cxxmph {
|
namespace cxxmph {
|
||||||
|
|
||||||
template <class Key, class NewRandomlySeededHashFcn = RandomlySeededMurmur2>
|
|
||||||
class MPHTable {
|
class MPHTable {
|
||||||
public:
|
public:
|
||||||
typedef Key key_type;
|
// This class could be a template for both key type and hash function, but we
|
||||||
typedef NewRandomlySeededHashFcn hasher;
|
// chose to go with simplicity.
|
||||||
|
typedef StringPiece key_type;
|
||||||
|
typedef RandomlySeededHashFunction<Murmur2StringPiece> hasher_type;
|
||||||
|
|
||||||
MPHTable(double c = 1.23, cmph_uint8 b = 7) : c_(c), b_(b) { }
|
MPHTable(double c = 1.23, cmph_uint8 b = 7) : c_(c), b_(b) { }
|
||||||
~MPHTable();
|
~MPHTable() {}
|
||||||
|
|
||||||
template <class ForwardIterator>
|
template <class ForwardIterator>
|
||||||
bool Reset(ForwardIterator begin, ForwardIterator end);
|
bool Reset(ForwardIterator begin, ForwardIterator end);
|
||||||
@ -26,21 +34,23 @@ class MPHTable {
|
|||||||
private:
|
private:
|
||||||
template <class ForwardIterator>
|
template <class ForwardIterator>
|
||||||
bool Mapping(ForwardIterator begin, ForwardIterator end,
|
bool Mapping(ForwardIterator begin, ForwardIterator end,
|
||||||
vector<Edge>* edges, vector<cmph_uint32> queue);
|
std::vector<TriGraph::Edge>* edges,
|
||||||
bool GenerateQueue(TriGraph* graph, vector<cmph_uint32>* queue);
|
std::vector<cmph_uint32>* queue);
|
||||||
void Assigning(TriGraph* graph_builder, Queue* queue);
|
bool GenerateQueue(TriGraph* graph, std::vector<cmph_uint32>* queue);
|
||||||
void Ranking(TriGraph* graph_builder, Queue* queue);
|
void Assigning(const std::vector<TriGraph::Edge>& edges,
|
||||||
cmph_uint32 Search(const StringPiece& key);
|
const std::vector<cmph_uint32>& queue);
|
||||||
cmph_uint32 Rank(const StringPiece& key);
|
void Ranking();
|
||||||
|
cmph_uint32 Search(const key_type& key) const;
|
||||||
|
cmph_uint32 Rank(cmph_uint32 vertex) const;
|
||||||
|
|
||||||
// Algorithm parameters
|
// Algorithm parameters
|
||||||
cmph_uint8 b_; // Number of bits of the kth index in the ranktable
|
|
||||||
double c_; // Number of bits per key (? is it right)
|
double c_; // Number of bits per key (? is it right)
|
||||||
|
cmph_uint8 b_; // Number of bits of the kth index in the ranktable
|
||||||
|
|
||||||
// Values used during generation
|
// Values used during generation
|
||||||
cmph_uint32 m_; // edges count
|
cmph_uint32 m_; // edges count
|
||||||
cmph_uint32 n_; // vertex count
|
cmph_uint32 n_; // vertex count
|
||||||
cmph_uint32 k_ // kth index in ranktable, $k = log_2(n=3r)\varepsilon$
|
cmph_uint32 k_; // kth index in ranktable, $k = log_2(n=3r)\varepsilon$
|
||||||
|
|
||||||
// Values used during search
|
// Values used during search
|
||||||
|
|
||||||
@ -52,10 +62,59 @@ class MPHTable {
|
|||||||
std::vector<cmph_uint32> ranktable_;
|
std::vector<cmph_uint32> ranktable_;
|
||||||
// The selected hash function triplet for finding the edges in the minimal
|
// The selected hash function triplet for finding the edges in the minimal
|
||||||
// perfect hash function graph.
|
// perfect hash function graph.
|
||||||
hasher hash_function_[3];
|
hasher_type hash_function_[3];
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Template method needs to go in the header file.
|
||||||
|
template <class ForwardIterator>
|
||||||
|
bool MPHTable::Reset(ForwardIterator begin, ForwardIterator end) {
|
||||||
|
m_ = end - begin;
|
||||||
|
r_ = static_cast<cmph_uint32>(ceil((c_*m_)/3));
|
||||||
|
if ((r_ % 2) == 0) r_ += 1;
|
||||||
|
n_ = 3*r_;
|
||||||
|
k_ = 1U << b_;
|
||||||
|
|
||||||
|
cerr << "m " << m_ << " n " << n_ << " r " << r_ << endl;
|
||||||
|
|
||||||
|
int iterations = 1000;
|
||||||
|
std::vector<TriGraph::Edge> edges;
|
||||||
|
std::vector<cmph_uint32> queue;
|
||||||
|
while (1) {
|
||||||
|
cerr << "Iterations missing: " << iterations << endl;
|
||||||
|
for (int i = 0; i < 3; ++i) hash_function_[i] = hasher_type();
|
||||||
|
if (Mapping(begin, end, &edges, &queue)) break;
|
||||||
|
else --iterations;
|
||||||
|
if (iterations == 0) break;
|
||||||
|
}
|
||||||
|
if (iterations == 0) return false;
|
||||||
|
Assigning(edges, queue);
|
||||||
|
std::vector<TriGraph::Edge>().swap(edges);
|
||||||
|
Ranking();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class ForwardIterator>
|
||||||
|
bool MPHTable::Mapping(
|
||||||
|
ForwardIterator begin, ForwardIterator end,
|
||||||
|
std::vector<TriGraph::Edge>* edges, std::vector<cmph_uint32>* queue) {
|
||||||
|
TriGraph graph(n_, m_);
|
||||||
|
for (ForwardIterator it = begin; it != end; ++it) {
|
||||||
|
cmph_uint32 h[3];
|
||||||
|
for (int i = 0; i < 3; ++i) h[i] = hash_function_[i](*it);
|
||||||
|
cmph_uint32 v0 = h[0] % r_;
|
||||||
|
cmph_uint32 v1 = h[1] % r_ + r_;
|
||||||
|
cmph_uint32 v2 = h[2] % r_ + (r_ << 1);
|
||||||
|
cerr << "Key: " << *it << " vertex " << it - begin << " (" << v0 << "," << v1 << "," << v2 << ")" << endl;
|
||||||
|
graph.AddEdge(TriGraph::Edge(v0, v1, v2));
|
||||||
|
}
|
||||||
|
if (GenerateQueue(&graph, queue)) {
|
||||||
|
graph.ExtractEdgesAndClear(edges);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace cxxmph
|
} // namespace cxxmph
|
||||||
|
|
||||||
#define // __CXXMPH_MPHTABLE_H__
|
#endif // __CXXMPH_MPHTABLE_H__
|
||||||
|
@ -1,22 +1,30 @@
|
|||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "mphtable.h"
|
#include "mphtable.h"
|
||||||
|
|
||||||
|
using std::string;
|
||||||
using std::vector;
|
using std::vector;
|
||||||
using cxxmph::MPHTable;
|
using cxxmph::MPHTable;
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
vector<int> keys;
|
vector<string> keys;
|
||||||
keys.push_back(10);
|
keys.push_back("davi");
|
||||||
keys.push_back(4);
|
keys.push_back("paulo");
|
||||||
keys.push_back(3);
|
keys.push_back("joao");
|
||||||
|
keys.push_back("maria");
|
||||||
|
keys.push_back("bruno");
|
||||||
|
|
||||||
MPHTable<int> mphtable;
|
MPHTable mphtable;
|
||||||
assert(mphtable.Reset(keys.begin(), keys.end()));
|
assert(mphtable.Reset(keys.begin(), keys.end()));
|
||||||
vector<int> ids;
|
vector<int> ids;
|
||||||
for (int i = 0; i < keys.size(); ++i) ids.push_back(mphtable.index(keys[i]));
|
for (vector<int>::size_type i = 0; i < keys.size(); ++i) {
|
||||||
|
ids.push_back(mphtable.index(keys[i]));
|
||||||
|
cerr << " " << *(ids.end() - 1);
|
||||||
|
}
|
||||||
|
cerr << endl;
|
||||||
sort(ids.begin(), ids.end());
|
sort(ids.begin(), ids.end());
|
||||||
for (int i = 0; i < ids.size(); ++i) assert(ids[i] == i);
|
for (vector<int>::size_type i = 0; i < ids.size(); ++i) assert(ids[i] == static_cast<vector<int>::value_type>(i));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8,17 +8,35 @@
|
|||||||
|
|
||||||
#include "../src/cmph_types.h"
|
#include "../src/cmph_types.h"
|
||||||
#include "MurmurHash2.h"
|
#include "MurmurHash2.h"
|
||||||
|
#include "stringpiece.h"
|
||||||
|
|
||||||
namespace cxxmph {
|
namespace cxxmph {
|
||||||
|
|
||||||
struct RandomlySeededMurmur2 {
|
template <class HashFun>
|
||||||
|
struct RandomlySeededHashFunction { };
|
||||||
|
|
||||||
|
class Murmur2StringPiece { };
|
||||||
|
class Murmur2Pod { };
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct RandomlySeededHashFunction<Murmur2StringPiece> {
|
||||||
RandomlySeededHashFunction() : seed(random()) { }
|
RandomlySeededHashFunction() : seed(random()) { }
|
||||||
cmph_uint32 operator()(const StringPiece& key) {
|
cmph_uint32 operator()(const StringPiece& key) const {
|
||||||
return MurmurHash2(key.data(), key.length(), seed);
|
return MurmurHash2(key.data(), key.length(), seed);
|
||||||
}
|
}
|
||||||
cmph_uint32 seed;
|
cmph_uint32 seed;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<>
|
||||||
|
struct RandomlySeededHashFunction<Murmur2Pod> {
|
||||||
|
RandomlySeededHashFunction() : seed(random()) { }
|
||||||
|
template<class Key>
|
||||||
|
cmph_uint32 operator()(const Key& key) const {
|
||||||
|
return MurmurHash2(&key, sizeof(key), seed);
|
||||||
|
}
|
||||||
|
cmph_uint32 seed;
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace cxxmph
|
} // namespace cxxmph
|
||||||
|
|
||||||
#endif // __CXXMPH_RANDOMLY_SEEDED_HASH__
|
#endif // __CXXMPH_RANDOMLY_SEEDED_HASH__
|
||||||
|
@ -1,8 +1,11 @@
|
|||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
#include "trigraph.h"
|
#include "trigraph.h"
|
||||||
|
|
||||||
|
using std::cerr;
|
||||||
|
using std::endl;
|
||||||
using std::vector;
|
using std::vector;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
@ -11,9 +14,10 @@ static const cmph_uint8 kInvalidEdge = std::numeric_limits<cmph_uint8>::max();
|
|||||||
|
|
||||||
namespace cxxmph {
|
namespace cxxmph {
|
||||||
|
|
||||||
TriGraph::TriGraph(cmph_uint32 nedges, cmph_uint32 nvertices)
|
TriGraph::TriGraph(cmph_uint32 nvertices, cmph_uint32 nedges)
|
||||||
: nedges_(0),
|
: nedges_(0),
|
||||||
edges_(nedges),
|
edges_(nedges),
|
||||||
|
next_edge_(nedges),
|
||||||
first_edge_(nvertices, kInvalidEdge),
|
first_edge_(nvertices, kInvalidEdge),
|
||||||
vertex_degree_(nvertices, 0) { }
|
vertex_degree_(nvertices, 0) { }
|
||||||
|
|
||||||
@ -25,7 +29,13 @@ void TriGraph::ExtractEdgesAndClear(vector<Edge>* edges) {
|
|||||||
edges->swap(edges_);
|
edges->swap(edges_);
|
||||||
}
|
}
|
||||||
void TriGraph::AddEdge(const Edge& edge) {
|
void TriGraph::AddEdge(const Edge& edge) {
|
||||||
edges_[nedges_] = edge;
|
edges_[nedges_] = edge;
|
||||||
|
assert(first_edge_.size() > edge[0]);
|
||||||
|
assert(first_edge_.size() > edge[1]);
|
||||||
|
assert(first_edge_.size() > edge[0]);
|
||||||
|
assert(first_edge_.size() > edge[1]);
|
||||||
|
assert(first_edge_.size() > edge[2]);
|
||||||
|
assert(next_edge_.size() > nedges_);
|
||||||
next_edge_[nedges_] = Edge(
|
next_edge_[nedges_] = Edge(
|
||||||
first_edge_[edge[0]], first_edge_[edge[1]], first_edge_[edge[2]]);
|
first_edge_[edge[0]], first_edge_[edge[1]], first_edge_[edge[2]]);
|
||||||
first_edge_[edge[0]] = first_edge_[edge[1]] = first_edge_[edge[2]] = nedges_;
|
first_edge_[edge[0]] = first_edge_[edge[1]] = first_edge_[edge[2]] = nedges_;
|
||||||
@ -36,7 +46,7 @@ void TriGraph::AddEdge(const Edge& edge) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void TriGraph::RemoveEdge(cmph_uint32 current_edge) {
|
void TriGraph::RemoveEdge(cmph_uint32 current_edge) {
|
||||||
cmph_uint32 vertex, edge1, edge2;
|
cerr << "Removing edge " << current_edge << " from " << nedges_ << " existing edges " << endl;
|
||||||
for (int i = 0; i < 3; ++i) {
|
for (int i = 0; i < 3; ++i) {
|
||||||
cmph_uint32 vertex = edges_[current_edge][i];
|
cmph_uint32 vertex = edges_[current_edge][i];
|
||||||
cmph_uint32 edge1 = first_edge_[vertex];
|
cmph_uint32 edge1 = first_edge_[vertex];
|
||||||
|
@ -14,9 +14,14 @@
|
|||||||
namespace cxxmph {
|
namespace cxxmph {
|
||||||
|
|
||||||
class TriGraph {
|
class TriGraph {
|
||||||
|
public:
|
||||||
struct Edge {
|
struct Edge {
|
||||||
Edge() { }
|
Edge() { }
|
||||||
Edge(cmph_uint32 v0, cmph_uint32 v1, cmph_uint32 v2);
|
Edge(cmph_uint32 v0, cmph_uint32 v1, cmph_uint32 v2) {
|
||||||
|
vertices[0] = v0;
|
||||||
|
vertices[1] = v1;
|
||||||
|
vertices[2] = v2;
|
||||||
|
}
|
||||||
cmph_uint32& operator[](cmph_uint8 v) { return vertices[v]; }
|
cmph_uint32& operator[](cmph_uint8 v) { return vertices[v]; }
|
||||||
const cmph_uint32& operator[](cmph_uint8 v) const { return vertices[v]; }
|
const cmph_uint32& operator[](cmph_uint8 v) const { return vertices[v]; }
|
||||||
cmph_uint32 vertices[3];
|
cmph_uint32 vertices[3];
|
||||||
|
Loading…
Reference in New Issue
Block a user