From b3842c69e827e915dd888ab8ffcc33e3c7bde46c Mon Sep 17 00:00:00 2001 From: Davi Reis Date: Mon, 19 Mar 2012 03:10:42 -0300 Subject: [PATCH] New bit code works, need to cleanup logging. --- cxxmph/mph_bits.h | 22 +++++++++++++----- cxxmph/mph_bits_test.cc | 12 ++++++++-- cxxmph/mph_index.cc | 50 +++++++++++++--------------------------- cxxmph/mph_index.h | 19 +++++++-------- cxxmph/mph_index_test.cc | 2 ++ 5 files changed, 52 insertions(+), 53 deletions(-) diff --git a/cxxmph/mph_bits.h b/cxxmph/mph_bits.h index 6577b9d..36782f2 100644 --- a/cxxmph/mph_bits.h +++ b/cxxmph/mph_bits.h @@ -17,16 +17,23 @@ namespace cxxmph { class dynamic_2bitset { public: - dynamic_2bitset() : fill_(false) {} + dynamic_2bitset() : size_(0), fill_(false) {} dynamic_2bitset(uint32_t size, bool fill = false) : size_(size), fill_(fill), data_(ceil(size / 4.0), ones()*fill) { + if (data_.size()) fprintf(stderr, "creating %p size %d\n", &data_[0], data_.size()); + } + ~dynamic_2bitset() { + if (data_.size()) fprintf(stderr, "Deleting %p size %d\n", &data_[0], data_.size()); } const uint8_t operator[](uint32_t i) const { return get(i); } - uint8_t get(uint32_t i) const { + const uint8_t get(uint32_t i) const { + assert(i < size()); + assert((i >> 2) < data_.size()); return (data_[(i >> 2)] >> (((i & 3) << 1)) & 3); } uint8_t set(uint32_t i, uint8_t v) { + assert((i >> 2) < data_.size()); data_[(i >> 2)] |= ones() ^ dynamic_2bitset::vmask[i & 3]; data_[(i >> 2)] &= ((v << ((i & 3) << 1)) | dynamic_2bitset::vmask[i & 3]); assert(v <= 3); @@ -39,17 +46,18 @@ class dynamic_2bitset { void swap(dynamic_2bitset& other) { std::swap(other.size_, size_); std::swap(other.fill_, fill_); - std::swap(other.data_, data_); + other.data_.swap(data_); } - void clear() { data_.clear(); } + void clear() { data_.clear(); size_ = 0; } uint32_t size() const { return size_; } static const uint8_t vmask[]; - private: + const std::vector& data() const { return data_; } +// private: uint32_t size_; bool fill_; std::vector data_; - uint8_t ones() { return std::numeric_limits::max(); } + const uint8_t ones() { return std::numeric_limits::max(); } }; static void set_2bit_value(uint8_t *d, uint32_t i, uint8_t v) { @@ -67,6 +75,8 @@ static uint32_t nextpoweroftwo(uint32_t k) { // Interesting bit tricks that might end up here: // http://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord +// Fast a % (k*2^t) +// http://www.azillionmonkeys.com/qed/adiv.html } // namespace cxxmph diff --git a/cxxmph/mph_bits_test.cc b/cxxmph/mph_bits_test.cc index e6a764d..c1680e3 100644 --- a/cxxmph/mph_bits_test.cc +++ b/cxxmph/mph_bits_test.cc @@ -40,10 +40,18 @@ int main(int argc, char** argv) { if (size_corner1.size() != 1) exit(-1); dynamic_2bitset size_corner2(2); if (size_corner2.size() != 2) exit(-1); - (dynamic_2bitset(4)).swap(size_corner2); + (dynamic_2bitset(4, true)).swap(size_corner2); if (size_corner2.size() != 4) exit(-1); + for (int i = 0; i < size_corner2.size(); ++i) { + if (size_corner2[i] != 3) exit(-1); + } + size_corner2.clear(); + if (size_corner2.size() != 0) exit(-1); - + dynamic_2bitset empty; + empty.clear(); + dynamic_2bitset large(1000, true); + empty.swap(large); } diff --git a/cxxmph/mph_index.cc b/cxxmph/mph_index.cc index b1c0176..e1b24a8 100644 --- a/cxxmph/mph_index.cc +++ b/cxxmph/mph_index.cc @@ -44,9 +44,6 @@ MPHIndex::~MPHIndex() { } void MPHIndex::clear() { - if (!deserialized_) delete [] g_; - g_ = NULL; - g_size_ = 0; if (!deserialized_) delete [] ranktable_; ranktable_ = NULL; ranktable_size_ = 0; @@ -113,13 +110,9 @@ void MPHIndex::Assigning( const vector& edges, const vector& queue) { uint32_t current_edge = 0; vector marked_vertices(n_ + 1); + dynamic_2bitset().swap(g_); // Initialize vector of half nibbles with all bits set. - g_size_ = static_cast(ceil(n_/4.0)); - if (!deserialized_) delete [] g_; - g_ = NULL; - uint8_t* g = new uint8_t[g_size_]; - memset(g, std::numeric_limits::max(), g_size_); - assert(g[g_size_ - 1] == 255); + dynamic_2bitset g(n_, true /* set bits to 1 */); uint32_t nedges = m_; // for legibility for (int i = nedges - 1; i + 1 >= 1; --i) { @@ -133,35 +126,35 @@ void MPHIndex::Assigning( */ if (!marked_vertices[e[0]]) { if (!marked_vertices[e[1]]) { - set_2bit_value(g, e[1], kUnassigned); + g.set(e[1], kUnassigned); marked_vertices[e[1]] = true; } if (!marked_vertices[e[2]]) { - set_2bit_value(g, e[2], kUnassigned); + g.set(e[2], kUnassigned); assert(marked_vertices.size() > e[2]); marked_vertices[e[2]] = true; } - set_2bit_value(g, e[0], (6 - (get_2bit_value(g, e[1]) + get_2bit_value(g, e[2]))) % 3); + g.set(e[0], (6 - (g[e[1]] + g[e[2]])) % 3); marked_vertices[e[0]] = true; } else if (!marked_vertices[e[1]]) { if (!marked_vertices[e[2]]) { - set_2bit_value(g, e[2], kUnassigned); + g.set(e[2], kUnassigned); marked_vertices[e[2]] = true; } - set_2bit_value(g, e[1], (7 - (get_2bit_value(g, e[0]) + get_2bit_value(g, e[2]))) % 3); + g.set(e[1], (7 - (g[e[0]] + g[e[2]])) % 3); marked_vertices[e[1]] = true; } else { - set_2bit_value(g, e[2], (8 - (get_2bit_value(g, e[0]) + get_2bit_value(g, e[1]))) % 3); + g.set(e[2], (8 - (g[e[0]] + g[e[1]])) % 3); marked_vertices[e[2]] = true; } /* cerr << "A: " << e[0] << " " << e[1] << " " << e[2] << " -> " - << get_2bit_value(g, e[0]) << " " - << get_2bit_value(g, e[1]) << " " - << get_2bit_value(g, e[2]) << " " << endl; + << static_cast(g[e[0]]) << " " + << static_cast(g[e[1]]) << " " + << static_cast(g[e[2]]) << " " << endl; */ } - g_ = g; + g_.swap(g); } void MPHIndex::Ranking() { @@ -194,19 +187,17 @@ uint32_t MPHIndex::Rank(uint32_t vertex) const { uint32_t beg_idx_v = index << b_; uint32_t beg_idx_b = beg_idx_v >> 2; uint32_t end_idx_b = vertex >> 2; - while (beg_idx_b < end_idx_b) base_rank += kBdzLookupIndex[g_[beg_idx_b++]]; + while (beg_idx_b < end_idx_b) base_rank += kBdzLookupIndex[g_.data()[beg_idx_b++]]; beg_idx_v = beg_idx_b << 2; // cerr << "beg_idx_v: " << beg_idx_v << endl; // cerr << "base rank: " << base_rank << endl; - /* cerr << "G: "; for (unsigned int i = 0; i < n_; ++i) { - cerr << get_2bit_value(g_, i) << " "; + cerr << static_cast(g_[i]) << " "; } cerr << endl; - */ while (beg_idx_v < vertex) { - if (get_2bit_value(g_, beg_idx_v) != kUnassigned) ++base_rank; + if (g_[beg_idx_v] != kUnassigned) ++base_rank; ++beg_idx_v; } // cerr << "Base rank: " << base_rank << endl; @@ -214,21 +205,12 @@ uint32_t MPHIndex::Rank(uint32_t vertex) const { } uint32_t MPHIndex::serialize_bytes_needed() const { - return sizeof(MPHIndex) + g_size_ + ranktable_size_*sizeof(uint32_t); + return 0; } void MPHIndex::serialize(char* memory) const { - memcpy(memory, this, sizeof(MPHIndex)); - memcpy(memory + sizeof(MPHIndex), g_, g_size_); - memcpy(memory + sizeof(MPHIndex) + g_size_, - ranktable_, ranktable_size_*sizeof(uint32_t)); } bool MPHIndex::deserialize(const char* serialized_memory) { - memcpy(this, serialized_memory, sizeof(MPHIndex)); - g_ = reinterpret_cast(serialized_memory + sizeof(MPHIndex)); - ranktable_ = reinterpret_cast( - serialized_memory + sizeof(MPHIndex) + g_size_); - deserialized_ = true; return true; } diff --git a/cxxmph/mph_index.h b/cxxmph/mph_index.h index 72ea3ef..c872f6b 100644 --- a/cxxmph/mph_index.h +++ b/cxxmph/mph_index.h @@ -45,7 +45,6 @@ class MPHIndex { public: MPHIndex(double c = 1.23, uint8_t b = 7) : c_(c), b_(b), m_(0), n_(0), k_(0), r_(1), - g_(NULL), g_size_(0), ranktable_(NULL), ranktable_size_(0), deserialized_(false) { } ~MPHIndex(); @@ -103,10 +102,8 @@ class MPHIndex { uint32_t r_; uint32_t nest_displacement_[3]; // derived from r_ - // The array containing the minimal perfect hash function graph. Do not use - // c++ vector to make mmap based backing easier. - const uint8_t* g_; - uint32_t g_size_; + // The array containing the minimal perfect hash function graph. + dynamic_2bitset g_; uint8_t threebit_mod3[10]; // speed up mod3 calculation for 3bit ints // The table used for the rank step of the minimal perfect hash function const uint32_t* ranktable_; @@ -156,6 +153,7 @@ bool MPHIndex::Reset( } if (iterations == 0) return false; Assigning(edges, queue); + fprintf(stderr, "Assignment finished\n"); std::vector().swap(edges); Ranking(); deserialized_ = false; @@ -192,7 +190,7 @@ void MPHIndex::hash_vector(const Key& key, uint32_t* h) const { template uint32_t MPHIndex::perfect_hash(const Key& key) const { uint32_t h[4]; - if (!g_size_) return 0; + if (!g_.size()) return 0; SeededHashFcn().hash64(key, hash_seed_[0], h); h[0] = (h[0] % r_) + nest_displacement_[0]; h[1] = (h[1] % r_) + nest_displacement_[1]; @@ -200,12 +198,11 @@ uint32_t MPHIndex::perfect_hash(const Key& key) const { // h[0] = (h[0] & (r_-1)) + nest_displacement_[0]; // h[1] = (h[1] & (r_-1)) + nest_displacement_[1]; // h[2] = (h[2] & (r_-1)) + nest_displacement_[2]; - // cerr << "g_.size() " << g_size_ << " h0 >> 2 " << (h[0] >> 2) << endl; - assert((h[0] >> 2) > 2) > 2) ::size_type i = 0; i < ids.size(); ++i) assert(ids[i] == static_cast::value_type>(i)); + /* char* serialized = new char[mph_index.serialize_bytes_needed()]; mph_index.serialize(serialized); SimpleMPHIndex other_mph_index; other_mph_index.deserialize(serialized); + */ }