diff --git a/cxxmph/mph_map.h b/cxxmph/mph_map.h index 538f708..398db3e 100644 --- a/cxxmph/mph_map.h +++ b/cxxmph/mph_map.h @@ -112,6 +112,7 @@ MPH_MAP_METHOD_DECL(void_type, rehash)() { for (const_iterator it = values_.begin(), end = values_.end(); it != end; ++it) { size_type id = table_.index(it->first); + assert(id < new_values.size()); new_values[id] = *it; } values_.swap(new_values); diff --git a/cxxmph/mph_table.cc b/cxxmph/mph_table.cc index 2f92498..0fa0393 100644 --- a/cxxmph/mph_table.cc +++ b/cxxmph/mph_table.cc @@ -39,9 +39,20 @@ namespace cxxmph { const uint8_t MPHTable::valuemask[] = { 0xfc, 0xf3, 0xcf, 0x3f}; -void MPHTable::clear() { - // TODO(davi) impolement me +MPHTable::~MPHTable() { + clear(); } + +void MPHTable::clear() { + delete [] g_; + g_ = NULL; + g_size_ = 0; + delete [] ranktable_; + ranktable_ = NULL; + ranktable_size_ = 0; + // TODO(davi) implement me +} + bool MPHTable::GenerateQueue( TriGraph* graph, vector* queue_output) { uint32_t queue_head = 0, queue_tail = 0; @@ -61,12 +72,14 @@ bool MPHTable::GenerateQueue( } } } + /* for (unsigned int i = 0; i < marked_edge.size(); ++i) { cerr << "vertex with degree " << static_cast(graph->vertex_degree()[i]) << " marked " << marked_edge[i] << endl; } for (unsigned int i = 0; i < queue.size(); ++i) { cerr << "vertex " << i << " queued at " << queue[i] << endl; } + */ // At this point queue head is the number of edges touching at least one // vertex of degree 1. // cerr << "Queue head " << queue_head << " Queue tail " << queue_tail << endl; @@ -86,9 +99,11 @@ bool MPHTable::GenerateQueue( } } } + /* for (unsigned int i = 0; i < queue.size(); ++i) { cerr << "vertex " << i << " queued at " << queue[i] << endl; } + */ int cycles = queue_head - nedges; if (cycles == 0) queue.swap(*queue_output); return cycles == 0; @@ -99,60 +114,67 @@ void MPHTable::Assigning( uint32_t current_edge = 0; vector marked_vertices(n_ + 1); // Initialize vector of half nibbles with all bits set. - uint32_t sizeg = static_cast(ceil(n_/4.0)); - vector(sizeg, std::numeric_limits::max()).swap(g_); + g_size_ = static_cast(ceil(n_/4.0)); + delete [] g_; + g_ = new uint8_t[g_size_]; + memset(g_, std::numeric_limits::max(), g_size_); + assert(g_[g_size_ - 1] == 255); uint32_t nedges = m_; // for legibility for (int i = nedges - 1; i + 1 >= 1; --i) { current_edge = queue[i]; const TriGraph::Edge& e = edges[current_edge]; + /* cerr << "B: " << e[0] << " " << e[1] << " " << e[2] << " -> " << get_2bit_value(g_, e[0]) << " " << get_2bit_value(g_, e[1]) << " " << get_2bit_value(g_, e[2]) << " edge " << current_edge << endl; + */ if (!marked_vertices[e[0]]) { if (!marked_vertices[e[1]]) { - set_2bit_value(&g_, e[1], kUnassigned); + set_2bit_value(g_, e[1], kUnassigned); marked_vertices[e[1]] = true; } if (!marked_vertices[e[2]]) { - set_2bit_value(&g_, e[2], kUnassigned); + set_2bit_value(g_, e[2], kUnassigned); assert(marked_vertices.size() > e[2]); marked_vertices[e[2]] = true; } - set_2bit_value(&g_, e[0], (6 - (get_2bit_value(g_, e[1]) + get_2bit_value(g_, e[2]))) % 3); + set_2bit_value(g_, e[0], (6 - (get_2bit_value(g_, e[1]) + get_2bit_value(g_, e[2]))) % 3); marked_vertices[e[0]] = true; } else if (!marked_vertices[e[1]]) { if (!marked_vertices[e[2]]) { - set_2bit_value(&g_, e[2], kUnassigned); + set_2bit_value(g_, e[2], kUnassigned); marked_vertices[e[2]] = true; } - set_2bit_value(&g_, e[1], (7 - (get_2bit_value(g_, e[0]) + get_2bit_value(g_, e[2]))) % 3); + set_2bit_value(g_, e[1], (7 - (get_2bit_value(g_, e[0]) + get_2bit_value(g_, e[2]))) % 3); marked_vertices[e[1]] = true; } else { - set_2bit_value(&g_, e[2], (8 - (get_2bit_value(g_, e[0]) + get_2bit_value(g_, e[1]))) % 3); + set_2bit_value(g_, e[2], (8 - (get_2bit_value(g_, e[0]) + get_2bit_value(g_, e[1]))) % 3); marked_vertices[e[2]] = true; } + /* cerr << "A: " << e[0] << " " << e[1] << " " << e[2] << " -> " << get_2bit_value(g_, e[0]) << " " << get_2bit_value(g_, e[1]) << " " << get_2bit_value(g_, e[2]) << " " << endl; + */ } } void MPHTable::Ranking() { uint32_t nbytes_total = static_cast(ceil(n_ / 4.0)); uint32_t size = k_ >> 2U; - uint32_t ranktablesize = static_cast( + ranktable_size_ = static_cast( ceil(n_ / static_cast(k_))); - // TODO(davi) Change swap of member classes for resize + memset to avoid - // fragmentation - vector (ranktablesize).swap(ranktable_);; + delete [] ranktable_; + ranktable_ = new uint32_t[ranktable_size_]; + memset(ranktable_, 0, ranktable_size_*sizeof(uint32_t)); uint32_t offset = 0; uint32_t count = 0; uint32_t i = 1; while (1) { - if (i == ranktable_.size()) break; + if (i == ranktable_size_) break; uint32_t nbytes = size < nbytes_total ? size : nbytes_total; for (uint32_t j = 0; j < nbytes; ++j) count += kBdzLookupTable[g_[offset + j]]; ranktable_[i] = count; @@ -170,14 +192,15 @@ uint32_t MPHTable::Rank(uint32_t vertex) const { uint32_t end_idx_b = vertex >> 2; while (beg_idx_b < end_idx_b) base_rank += kBdzLookupTable[g_[beg_idx_b++]]; beg_idx_v = beg_idx_b << 2; - cerr << "beg_idx_v: " << beg_idx_v << endl; - cerr << "base rank: " << base_rank << endl; - + // cerr << "beg_idx_v: " << beg_idx_v << endl; + // cerr << "base rank: " << base_rank << endl; + /* cerr << "G: "; for (unsigned int i = 0; i < n_; ++i) { cerr << get_2bit_value(g_, i) << " "; } cerr << endl; + */ while (beg_idx_v < vertex) { if (get_2bit_value(g_, beg_idx_v) != kUnassigned) ++base_rank; ++beg_idx_v; diff --git a/cxxmph/mph_table.h b/cxxmph/mph_table.h index 99baaa3..34544fd 100644 --- a/cxxmph/mph_table.h +++ b/cxxmph/mph_table.h @@ -23,8 +23,9 @@ namespace cxxmph { class MPHTable { public: MPHTable(double c = 1.23, uint8_t b = 7) : - c_(c), b_(b), m_(0), n_(0), k_(0), r_(0) { } - ~MPHTable() {} + c_(c), b_(b), m_(0), n_(0), k_(0), r_(0), + g_(NULL), g_size_(0), ranktable_(NULL), ranktable_size_(0) { } + ~MPHTable(); template bool Reset(ForwardIterator begin, ForwardIterator end); @@ -57,20 +58,23 @@ class MPHTable { // Partition vertex count, derived from c parameter. uint32_t r_; - // The array containing the minimal perfect hash function graph. - std::vector g_; + // The array containing the minimal perfect hash function graph. Do not use + // c++ vector to make mmap based backing easier. + uint8_t* g_; + uint32_t g_size_; // The table used for the rank step of the minimal perfect hash function - std::vector ranktable_; + uint32_t* ranktable_; + uint32_t ranktable_size_; // The selected hash seed triplet for finding the edges in the minimal // perfect hash function graph. uint32_t hash_seed_[3]; static const uint8_t valuemask[]; - static void set_2bit_value(std::vector *d, uint32_t i, uint8_t v) { - (*d)[(i >> 2)] &= (v << ((i & 3) << 1)) | valuemask[i & 3]; + static void set_2bit_value(uint8_t *d, uint32_t i, uint8_t v) { + d[(i >> 2)] &= ((v << ((i & 3) << 1)) | valuemask[i & 3]); } - static uint32_t get_2bit_value(const std::vector& d, uint32_t i) { - return (d[(i >> 2)] >> ((i & 3) << 1)) & 3; + static uint32_t get_2bit_value(const uint8_t* d, uint32_t i) { + return (d[(i >> 2)] >> (((i & 3) << 1)) & 3); } @@ -85,13 +89,13 @@ bool MPHTable::Reset(ForwardIterator begin, ForwardIterator end) { n_ = 3*r_; k_ = 1U << b_; - cerr << "m " << m_ << " n " << n_ << " r " << r_ << endl; + // cerr << "m " << m_ << " n " << n_ << " r " << r_ << endl; int iterations = 10; std::vector edges; std::vector queue; while (1) { - cerr << "Iterations missing: " << iterations << endl; + // cerr << "Iterations missing: " << iterations << endl; for (int i = 0; i < 3; ++i) hash_seed_[i] = random() % m_; // for (int i = 0; i < 3; ++i) hash_seed_[i] = random() + i; if (Mapping(begin, end, &edges, &queue)) break; @@ -116,7 +120,7 @@ bool MPHTable::Mapping( uint32_t v0 = h[0] % r_; uint32_t v1 = h[1] % r_ + r_; uint32_t v2 = h[2] % r_ + (r_ << 1); - cerr << "Key: " << *it << " edge " << it - begin << " (" << v0 << "," << v1 << "," << v2 << ")" << endl; + // cerr << "Key: " << *it << " edge " << it - begin << " (" << v0 << "," << v1 << "," << v2 << ")" << endl; graph.AddEdge(TriGraph::Edge(v0, v1, v2)); } if (GenerateQueue(&graph, queue)) { @@ -133,13 +137,13 @@ uint32_t MPHTable::index(const Key& key) const { h[0] = h[0] % r_; h[1] = h[1] % r_ + r_; h[2] = h[2] % r_ + (r_ << 1); - assert(g_.size()); - cerr << "g_.size() " << g_.size() << " h0 >> 2 " << (h[0] >> 2) << endl; - assert((h[0] >> 2) > 2) > 2) > 2 " << (h[0] >> 2) << endl; + assert((h[0] >> 2) > 2) > 2)