diff --git a/cxxmph/mph_index.cc b/cxxmph/mph_index.cc index f5be3dd..bb1176a 100644 --- a/cxxmph/mph_index.cc +++ b/cxxmph/mph_index.cc @@ -43,10 +43,10 @@ MPHIndex::~MPHIndex() { } void MPHIndex::clear() { - delete [] ranktable_; - ranktable_ = NULL; - ranktable_size_ = 0; - // TODO(davi) implement me + std::vector empty_ranktable; + ranktable_.swap(empty_ranktable); + dynamic_2bitset empty_g; + g_.swap(empty_g); } bool MPHIndex::GenerateQueue( @@ -159,17 +159,14 @@ void MPHIndex::Assigning( void MPHIndex::Ranking() { uint32_t nbytes_total = static_cast(ceil(n_ / 4.0)); uint32_t size = k_ >> 2U; - ranktable_size_ = static_cast( + uint32_t ranktable_size = static_cast( ceil(n_ / static_cast(k_))); - delete [] ranktable_; - ranktable_ = NULL; - uint32_t* ranktable = new uint32_t[ranktable_size_]; - memset(ranktable, 0, ranktable_size_*sizeof(uint32_t)); + vector ranktable(ranktable_size); uint32_t offset = 0; uint32_t count = 0; uint32_t i = 1; while (1) { - if (i == ranktable_size_) break; + if (i == ranktable.size()) break; uint32_t nbytes = size < nbytes_total ? size : nbytes_total; for (uint32_t j = 0; j < nbytes; ++j) { count += kBdzLookupIndex[g_.data()[offset + j]]; @@ -179,11 +176,11 @@ void MPHIndex::Ranking() { nbytes_total -= size; ++i; } - ranktable_ = ranktable; + ranktable_.swap(ranktable); } uint32_t MPHIndex::Rank(uint32_t vertex) const { - if (!ranktable_size_) return 0; + if (ranktable_.empty()) return 0; uint32_t index = vertex >> b_; uint32_t base_rank = ranktable_[index]; uint32_t beg_idx_v = index << b_; @@ -211,4 +208,22 @@ uint32_t MPHIndex::Rank(uint32_t vertex) const { return base_rank; } +void MPHIndex::swap(std::vector& params, dynamic_2bitset& g, std::vector& ranktable) { + params.resize(12); + uint32_t rounded_c = c_ * 1000 * 1000; + std::swap(params[0], rounded_c); + c_ = static_cast(rounded_c) / 1000 / 1000; + std::swap(params[1], m_); + std::swap(params[2], n_); + std::swap(params[3], k_); + uint32_t uint32_square = static_cast(square_); + std::swap(params[4], uint32_square); + square_ = uint32_square; + std::swap(params[5], hash_seed_[0]); + std::swap(params[6], hash_seed_[1]); + std::swap(params[7], hash_seed_[2]); + g.swap(g_); + ranktable.swap(ranktable_); +} + } // namespace cxxmph diff --git a/cxxmph/mph_index.h b/cxxmph/mph_index.h index d83f388..d6ff371 100644 --- a/cxxmph/mph_index.h +++ b/cxxmph/mph_index.h @@ -45,8 +45,7 @@ namespace cxxmph { class MPHIndex { public: MPHIndex(bool square = false, double c = 1.23, uint8_t b = 7) : - c_(c), b_(b), m_(0), n_(0), k_(0), square_(square), r_(1), g_(8, true), - ranktable_(NULL), ranktable_size_(0) { + c_(c), b_(b), m_(0), n_(0), k_(0), square_(square), r_(1), g_(8, true) { nest_displacement_[0] = 0; nest_displacement_[1] = r_; nest_displacement_[2] = (r_ << 1); @@ -63,7 +62,7 @@ class MPHIndex { void clear(); // Advanced users functions. Please avoid unless you know what you are doing. - uint32_t perfect_hash_size() const { return n_; } + uint32_t perfect_hash_size() const { return n_; } template // must agree with Reset uint32_t perfect_hash(const Key& x) const; // way faster than the minimal template // must agree with Reset @@ -72,6 +71,11 @@ class MPHIndex { template // must agree with Reset uint32_t minimal_perfect_hash(const Key& x) const; + // Experimental api to use as a serialization building block. + // Since this signature exposes some implementation details, expect it to + // change. + void swap(std::vector& params, dynamic_2bitset& g, std::vector& ranktable); + private: template bool Mapping(ForwardIterator begin, ForwardIterator end, @@ -85,7 +89,7 @@ class MPHIndex { // Algorithm parameters // Perfect hash function density. If this was a 2graph, - // then probability of having an acyclic graph would be + // then probability of having an acyclic graph would be // sqrt(1-(2/c)^2). See section 3 for details. // http://www.it-c.dk/people/pagh/papers/simpleperf.pdf double c_; @@ -107,8 +111,7 @@ class MPHIndex { dynamic_2bitset g_; uint8_t threebit_mod3[10]; // speed up mod3 calculation for 3bit ints // The table used for the rank step of the minimal perfect hash function - const uint32_t* ranktable_; - uint32_t ranktable_size_; + std::vector ranktable_; // The selected hash seed triplet for finding the edges in the minimal // perfect hash function graph. uint32_t hash_seed_[3]; @@ -142,7 +145,7 @@ bool MPHIndex::Reset( std::vector edges; std::vector queue; while (1) { - // cerr << "Iterations missing: " << iterations << endl; + cerr << "Iterations missing: " << iterations << endl; for (int i = 0; i < 3; ++i) hash_seed_[i] = random(); if (Mapping(begin, end, &edges, &queue)) break; else --iterations; @@ -160,7 +163,7 @@ bool MPHIndex::Mapping( ForwardIterator begin, ForwardIterator end, std::vector* edges, std::vector* queue) { TriGraph graph(n_, m_); - for (ForwardIterator it = begin; it != end; ++it) { + for (ForwardIterator it = begin; it != end; ++it) { h128 h = SeededHashFcn().hash128(*it, hash_seed_[0]); // for (int i = 0; i < 3; ++i) h[i] = SeededHashFcn()(*it, hash_seed_[i]); uint32_t v0 = h[0] % r_; diff --git a/cxxmph/mph_index_test.cc b/cxxmph/mph_index_test.cc index d0ff418..7a4ad73 100644 --- a/cxxmph/mph_index_test.cc +++ b/cxxmph/mph_index_test.cc @@ -34,6 +34,16 @@ int main(int argc, char** argv) { sort(ids.begin(), ids.end()); for (vector::size_type i = 0; i < ids.size(); ++i) assert(ids[i] == static_cast::value_type>(i)); + // Test serialization + vector params; + dynamic_2bitset g; + vector ranktable; + mph_index.swap(params, g, ranktable); + assert(mph_index.size() == 0); + mph_index.swap(params, g, ranktable); + assert(mph_index.size() == ids.size()); + for (vector::size_type i = 0; i < ids.size(); ++i) assert(ids[i] == static_cast::value_type>(i)); + FlexibleMPHIndex>::hash_function> square_empty; auto id = square_empty.index(1); FlexibleMPHIndex>::hash_function> unordered_empty;