Moved to c arrays to allow mmap'ing.
This commit is contained in:
parent
a61882d722
commit
37a57c18e8
@ -112,6 +112,7 @@ MPH_MAP_METHOD_DECL(void_type, rehash)() {
|
|||||||
for (const_iterator it = values_.begin(), end = values_.end();
|
for (const_iterator it = values_.begin(), end = values_.end();
|
||||||
it != end; ++it) {
|
it != end; ++it) {
|
||||||
size_type id = table_.index(it->first);
|
size_type id = table_.index(it->first);
|
||||||
|
assert(id < new_values.size());
|
||||||
new_values[id] = *it;
|
new_values[id] = *it;
|
||||||
}
|
}
|
||||||
values_.swap(new_values);
|
values_.swap(new_values);
|
||||||
|
@ -39,9 +39,20 @@ namespace cxxmph {
|
|||||||
|
|
||||||
const uint8_t MPHTable::valuemask[] = { 0xfc, 0xf3, 0xcf, 0x3f};
|
const uint8_t MPHTable::valuemask[] = { 0xfc, 0xf3, 0xcf, 0x3f};
|
||||||
|
|
||||||
void MPHTable::clear() {
|
MPHTable::~MPHTable() {
|
||||||
// TODO(davi) impolement me
|
clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MPHTable::clear() {
|
||||||
|
delete [] g_;
|
||||||
|
g_ = NULL;
|
||||||
|
g_size_ = 0;
|
||||||
|
delete [] ranktable_;
|
||||||
|
ranktable_ = NULL;
|
||||||
|
ranktable_size_ = 0;
|
||||||
|
// TODO(davi) implement me
|
||||||
|
}
|
||||||
|
|
||||||
bool MPHTable::GenerateQueue(
|
bool MPHTable::GenerateQueue(
|
||||||
TriGraph* graph, vector<uint32_t>* queue_output) {
|
TriGraph* graph, vector<uint32_t>* queue_output) {
|
||||||
uint32_t queue_head = 0, queue_tail = 0;
|
uint32_t queue_head = 0, queue_tail = 0;
|
||||||
@ -61,12 +72,14 @@ bool MPHTable::GenerateQueue(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
for (unsigned int i = 0; i < marked_edge.size(); ++i) {
|
for (unsigned int i = 0; i < marked_edge.size(); ++i) {
|
||||||
cerr << "vertex with degree " << static_cast<uint32_t>(graph->vertex_degree()[i]) << " marked " << marked_edge[i] << endl;
|
cerr << "vertex with degree " << static_cast<uint32_t>(graph->vertex_degree()[i]) << " marked " << marked_edge[i] << endl;
|
||||||
}
|
}
|
||||||
for (unsigned int i = 0; i < queue.size(); ++i) {
|
for (unsigned int i = 0; i < queue.size(); ++i) {
|
||||||
cerr << "vertex " << i << " queued at " << queue[i] << endl;
|
cerr << "vertex " << i << " queued at " << queue[i] << endl;
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
// At this point queue head is the number of edges touching at least one
|
// At this point queue head is the number of edges touching at least one
|
||||||
// vertex of degree 1.
|
// vertex of degree 1.
|
||||||
// cerr << "Queue head " << queue_head << " Queue tail " << queue_tail << endl;
|
// cerr << "Queue head " << queue_head << " Queue tail " << queue_tail << endl;
|
||||||
@ -86,9 +99,11 @@ bool MPHTable::GenerateQueue(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
for (unsigned int i = 0; i < queue.size(); ++i) {
|
for (unsigned int i = 0; i < queue.size(); ++i) {
|
||||||
cerr << "vertex " << i << " queued at " << queue[i] << endl;
|
cerr << "vertex " << i << " queued at " << queue[i] << endl;
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
int cycles = queue_head - nedges;
|
int cycles = queue_head - nedges;
|
||||||
if (cycles == 0) queue.swap(*queue_output);
|
if (cycles == 0) queue.swap(*queue_output);
|
||||||
return cycles == 0;
|
return cycles == 0;
|
||||||
@ -99,60 +114,67 @@ void MPHTable::Assigning(
|
|||||||
uint32_t current_edge = 0;
|
uint32_t current_edge = 0;
|
||||||
vector<bool> marked_vertices(n_ + 1);
|
vector<bool> marked_vertices(n_ + 1);
|
||||||
// Initialize vector of half nibbles with all bits set.
|
// Initialize vector of half nibbles with all bits set.
|
||||||
uint32_t sizeg = static_cast<uint32_t>(ceil(n_/4.0));
|
g_size_ = static_cast<uint32_t>(ceil(n_/4.0));
|
||||||
vector<uint8_t>(sizeg, std::numeric_limits<uint8_t>::max()).swap(g_);
|
delete [] g_;
|
||||||
|
g_ = new uint8_t[g_size_];
|
||||||
|
memset(g_, std::numeric_limits<uint8_t>::max(), g_size_);
|
||||||
|
assert(g_[g_size_ - 1] == 255);
|
||||||
|
|
||||||
uint32_t nedges = m_; // for legibility
|
uint32_t nedges = m_; // for legibility
|
||||||
for (int i = nedges - 1; i + 1 >= 1; --i) {
|
for (int i = nedges - 1; i + 1 >= 1; --i) {
|
||||||
current_edge = queue[i];
|
current_edge = queue[i];
|
||||||
const TriGraph::Edge& e = edges[current_edge];
|
const TriGraph::Edge& e = edges[current_edge];
|
||||||
|
/*
|
||||||
cerr << "B: " << e[0] << " " << e[1] << " " << e[2] << " -> "
|
cerr << "B: " << e[0] << " " << e[1] << " " << e[2] << " -> "
|
||||||
<< get_2bit_value(g_, e[0]) << " "
|
<< get_2bit_value(g_, e[0]) << " "
|
||||||
<< get_2bit_value(g_, e[1]) << " "
|
<< get_2bit_value(g_, e[1]) << " "
|
||||||
<< get_2bit_value(g_, e[2]) << " edge " << current_edge << endl;
|
<< get_2bit_value(g_, e[2]) << " edge " << current_edge << endl;
|
||||||
|
*/
|
||||||
if (!marked_vertices[e[0]]) {
|
if (!marked_vertices[e[0]]) {
|
||||||
if (!marked_vertices[e[1]]) {
|
if (!marked_vertices[e[1]]) {
|
||||||
set_2bit_value(&g_, e[1], kUnassigned);
|
set_2bit_value(g_, e[1], kUnassigned);
|
||||||
marked_vertices[e[1]] = true;
|
marked_vertices[e[1]] = true;
|
||||||
}
|
}
|
||||||
if (!marked_vertices[e[2]]) {
|
if (!marked_vertices[e[2]]) {
|
||||||
set_2bit_value(&g_, e[2], kUnassigned);
|
set_2bit_value(g_, e[2], kUnassigned);
|
||||||
assert(marked_vertices.size() > e[2]);
|
assert(marked_vertices.size() > e[2]);
|
||||||
marked_vertices[e[2]] = true;
|
marked_vertices[e[2]] = true;
|
||||||
}
|
}
|
||||||
set_2bit_value(&g_, e[0], (6 - (get_2bit_value(g_, e[1]) + get_2bit_value(g_, e[2]))) % 3);
|
set_2bit_value(g_, e[0], (6 - (get_2bit_value(g_, e[1]) + get_2bit_value(g_, e[2]))) % 3);
|
||||||
marked_vertices[e[0]] = true;
|
marked_vertices[e[0]] = true;
|
||||||
} else if (!marked_vertices[e[1]]) {
|
} else if (!marked_vertices[e[1]]) {
|
||||||
if (!marked_vertices[e[2]]) {
|
if (!marked_vertices[e[2]]) {
|
||||||
set_2bit_value(&g_, e[2], kUnassigned);
|
set_2bit_value(g_, e[2], kUnassigned);
|
||||||
marked_vertices[e[2]] = true;
|
marked_vertices[e[2]] = true;
|
||||||
}
|
}
|
||||||
set_2bit_value(&g_, e[1], (7 - (get_2bit_value(g_, e[0]) + get_2bit_value(g_, e[2]))) % 3);
|
set_2bit_value(g_, e[1], (7 - (get_2bit_value(g_, e[0]) + get_2bit_value(g_, e[2]))) % 3);
|
||||||
marked_vertices[e[1]] = true;
|
marked_vertices[e[1]] = true;
|
||||||
} else {
|
} else {
|
||||||
set_2bit_value(&g_, e[2], (8 - (get_2bit_value(g_, e[0]) + get_2bit_value(g_, e[1]))) % 3);
|
set_2bit_value(g_, e[2], (8 - (get_2bit_value(g_, e[0]) + get_2bit_value(g_, e[1]))) % 3);
|
||||||
marked_vertices[e[2]] = true;
|
marked_vertices[e[2]] = true;
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
cerr << "A: " << e[0] << " " << e[1] << " " << e[2] << " -> "
|
cerr << "A: " << e[0] << " " << e[1] << " " << e[2] << " -> "
|
||||||
<< get_2bit_value(g_, e[0]) << " "
|
<< get_2bit_value(g_, e[0]) << " "
|
||||||
<< get_2bit_value(g_, e[1]) << " "
|
<< get_2bit_value(g_, e[1]) << " "
|
||||||
<< get_2bit_value(g_, e[2]) << " " << endl;
|
<< get_2bit_value(g_, e[2]) << " " << endl;
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MPHTable::Ranking() {
|
void MPHTable::Ranking() {
|
||||||
uint32_t nbytes_total = static_cast<uint32_t>(ceil(n_ / 4.0));
|
uint32_t nbytes_total = static_cast<uint32_t>(ceil(n_ / 4.0));
|
||||||
uint32_t size = k_ >> 2U;
|
uint32_t size = k_ >> 2U;
|
||||||
uint32_t ranktablesize = static_cast<uint32_t>(
|
ranktable_size_ = static_cast<uint32_t>(
|
||||||
ceil(n_ / static_cast<double>(k_)));
|
ceil(n_ / static_cast<double>(k_)));
|
||||||
// TODO(davi) Change swap of member classes for resize + memset to avoid
|
delete [] ranktable_;
|
||||||
// fragmentation
|
ranktable_ = new uint32_t[ranktable_size_];
|
||||||
vector<uint32_t> (ranktablesize).swap(ranktable_);;
|
memset(ranktable_, 0, ranktable_size_*sizeof(uint32_t));
|
||||||
uint32_t offset = 0;
|
uint32_t offset = 0;
|
||||||
uint32_t count = 0;
|
uint32_t count = 0;
|
||||||
uint32_t i = 1;
|
uint32_t i = 1;
|
||||||
while (1) {
|
while (1) {
|
||||||
if (i == ranktable_.size()) break;
|
if (i == ranktable_size_) break;
|
||||||
uint32_t nbytes = size < nbytes_total ? size : nbytes_total;
|
uint32_t nbytes = size < nbytes_total ? size : nbytes_total;
|
||||||
for (uint32_t j = 0; j < nbytes; ++j) count += kBdzLookupTable[g_[offset + j]];
|
for (uint32_t j = 0; j < nbytes; ++j) count += kBdzLookupTable[g_[offset + j]];
|
||||||
ranktable_[i] = count;
|
ranktable_[i] = count;
|
||||||
@ -170,14 +192,15 @@ uint32_t MPHTable::Rank(uint32_t vertex) const {
|
|||||||
uint32_t end_idx_b = vertex >> 2;
|
uint32_t end_idx_b = vertex >> 2;
|
||||||
while (beg_idx_b < end_idx_b) base_rank += kBdzLookupTable[g_[beg_idx_b++]];
|
while (beg_idx_b < end_idx_b) base_rank += kBdzLookupTable[g_[beg_idx_b++]];
|
||||||
beg_idx_v = beg_idx_b << 2;
|
beg_idx_v = beg_idx_b << 2;
|
||||||
cerr << "beg_idx_v: " << beg_idx_v << endl;
|
// cerr << "beg_idx_v: " << beg_idx_v << endl;
|
||||||
cerr << "base rank: " << base_rank << endl;
|
// cerr << "base rank: " << base_rank << endl;
|
||||||
|
/*
|
||||||
cerr << "G: ";
|
cerr << "G: ";
|
||||||
for (unsigned int i = 0; i < n_; ++i) {
|
for (unsigned int i = 0; i < n_; ++i) {
|
||||||
cerr << get_2bit_value(g_, i) << " ";
|
cerr << get_2bit_value(g_, i) << " ";
|
||||||
}
|
}
|
||||||
cerr << endl;
|
cerr << endl;
|
||||||
|
*/
|
||||||
while (beg_idx_v < vertex) {
|
while (beg_idx_v < vertex) {
|
||||||
if (get_2bit_value(g_, beg_idx_v) != kUnassigned) ++base_rank;
|
if (get_2bit_value(g_, beg_idx_v) != kUnassigned) ++base_rank;
|
||||||
++beg_idx_v;
|
++beg_idx_v;
|
||||||
|
@ -23,8 +23,9 @@ namespace cxxmph {
|
|||||||
class MPHTable {
|
class MPHTable {
|
||||||
public:
|
public:
|
||||||
MPHTable(double c = 1.23, uint8_t b = 7) :
|
MPHTable(double c = 1.23, uint8_t b = 7) :
|
||||||
c_(c), b_(b), m_(0), n_(0), k_(0), r_(0) { }
|
c_(c), b_(b), m_(0), n_(0), k_(0), r_(0),
|
||||||
~MPHTable() {}
|
g_(NULL), g_size_(0), ranktable_(NULL), ranktable_size_(0) { }
|
||||||
|
~MPHTable();
|
||||||
|
|
||||||
template <class SeededHashFcn, class ForwardIterator>
|
template <class SeededHashFcn, class ForwardIterator>
|
||||||
bool Reset(ForwardIterator begin, ForwardIterator end);
|
bool Reset(ForwardIterator begin, ForwardIterator end);
|
||||||
@ -57,20 +58,23 @@ class MPHTable {
|
|||||||
|
|
||||||
// Partition vertex count, derived from c parameter.
|
// Partition vertex count, derived from c parameter.
|
||||||
uint32_t r_;
|
uint32_t r_;
|
||||||
// The array containing the minimal perfect hash function graph.
|
// The array containing the minimal perfect hash function graph. Do not use
|
||||||
std::vector<uint8_t> g_;
|
// c++ vector to make mmap based backing easier.
|
||||||
|
uint8_t* g_;
|
||||||
|
uint32_t g_size_;
|
||||||
// The table used for the rank step of the minimal perfect hash function
|
// The table used for the rank step of the minimal perfect hash function
|
||||||
std::vector<uint32_t> ranktable_;
|
uint32_t* ranktable_;
|
||||||
|
uint32_t ranktable_size_;
|
||||||
// The selected hash seed triplet for finding the edges in the minimal
|
// The selected hash seed triplet for finding the edges in the minimal
|
||||||
// perfect hash function graph.
|
// perfect hash function graph.
|
||||||
uint32_t hash_seed_[3];
|
uint32_t hash_seed_[3];
|
||||||
|
|
||||||
static const uint8_t valuemask[];
|
static const uint8_t valuemask[];
|
||||||
static void set_2bit_value(std::vector<uint8_t> *d, uint32_t i, uint8_t v) {
|
static void set_2bit_value(uint8_t *d, uint32_t i, uint8_t v) {
|
||||||
(*d)[(i >> 2)] &= (v << ((i & 3) << 1)) | valuemask[i & 3];
|
d[(i >> 2)] &= ((v << ((i & 3) << 1)) | valuemask[i & 3]);
|
||||||
}
|
}
|
||||||
static uint32_t get_2bit_value(const std::vector<uint8_t>& d, uint32_t i) {
|
static uint32_t get_2bit_value(const uint8_t* d, uint32_t i) {
|
||||||
return (d[(i >> 2)] >> ((i & 3) << 1)) & 3;
|
return (d[(i >> 2)] >> (((i & 3) << 1)) & 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -85,13 +89,13 @@ bool MPHTable::Reset(ForwardIterator begin, ForwardIterator end) {
|
|||||||
n_ = 3*r_;
|
n_ = 3*r_;
|
||||||
k_ = 1U << b_;
|
k_ = 1U << b_;
|
||||||
|
|
||||||
cerr << "m " << m_ << " n " << n_ << " r " << r_ << endl;
|
// cerr << "m " << m_ << " n " << n_ << " r " << r_ << endl;
|
||||||
|
|
||||||
int iterations = 10;
|
int iterations = 10;
|
||||||
std::vector<TriGraph::Edge> edges;
|
std::vector<TriGraph::Edge> edges;
|
||||||
std::vector<uint32_t> queue;
|
std::vector<uint32_t> queue;
|
||||||
while (1) {
|
while (1) {
|
||||||
cerr << "Iterations missing: " << iterations << endl;
|
// cerr << "Iterations missing: " << iterations << endl;
|
||||||
for (int i = 0; i < 3; ++i) hash_seed_[i] = random() % m_;
|
for (int i = 0; i < 3; ++i) hash_seed_[i] = random() % m_;
|
||||||
// for (int i = 0; i < 3; ++i) hash_seed_[i] = random() + i;
|
// for (int i = 0; i < 3; ++i) hash_seed_[i] = random() + i;
|
||||||
if (Mapping<SeededHashFcn>(begin, end, &edges, &queue)) break;
|
if (Mapping<SeededHashFcn>(begin, end, &edges, &queue)) break;
|
||||||
@ -116,7 +120,7 @@ bool MPHTable::Mapping(
|
|||||||
uint32_t v0 = h[0] % r_;
|
uint32_t v0 = h[0] % r_;
|
||||||
uint32_t v1 = h[1] % r_ + r_;
|
uint32_t v1 = h[1] % r_ + r_;
|
||||||
uint32_t v2 = h[2] % r_ + (r_ << 1);
|
uint32_t v2 = h[2] % r_ + (r_ << 1);
|
||||||
cerr << "Key: " << *it << " edge " << it - begin << " (" << v0 << "," << v1 << "," << v2 << ")" << endl;
|
// cerr << "Key: " << *it << " edge " << it - begin << " (" << v0 << "," << v1 << "," << v2 << ")" << endl;
|
||||||
graph.AddEdge(TriGraph::Edge(v0, v1, v2));
|
graph.AddEdge(TriGraph::Edge(v0, v1, v2));
|
||||||
}
|
}
|
||||||
if (GenerateQueue(&graph, queue)) {
|
if (GenerateQueue(&graph, queue)) {
|
||||||
@ -133,13 +137,13 @@ uint32_t MPHTable::index(const Key& key) const {
|
|||||||
h[0] = h[0] % r_;
|
h[0] = h[0] % r_;
|
||||||
h[1] = h[1] % r_ + r_;
|
h[1] = h[1] % r_ + r_;
|
||||||
h[2] = h[2] % r_ + (r_ << 1);
|
h[2] = h[2] % r_ + (r_ << 1);
|
||||||
assert(g_.size());
|
assert(g_size_);
|
||||||
cerr << "g_.size() " << g_.size() << " h0 >> 2 " << (h[0] >> 2) << endl;
|
// cerr << "g_.size() " << g_size_ << " h0 >> 2 " << (h[0] >> 2) << endl;
|
||||||
assert((h[0] >> 2) <g_.size());
|
assert((h[0] >> 2) <g_size_);
|
||||||
assert((h[1] >> 2) <g_.size());
|
assert((h[1] >> 2) <g_size_);
|
||||||
assert((h[2] >> 2) <g_.size());
|
assert((h[2] >> 2) <g_size_);
|
||||||
uint32_t vertex = h[(get_2bit_value(g_, h[0]) + get_2bit_value(g_, h[1]) + get_2bit_value(g_, h[2])) % 3];
|
uint32_t vertex = h[(get_2bit_value(g_, h[0]) + get_2bit_value(g_, h[1]) + get_2bit_value(g_, h[2])) % 3];
|
||||||
cerr << "Search found vertex " << vertex << endl;
|
// cerr << "Search found vertex " << vertex << endl;
|
||||||
return Rank(vertex);
|
return Rank(vertex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user