New bit code works, need to cleanup logging.

This commit is contained in:
Davi Reis 2012-03-19 03:10:42 -03:00
parent 50ac0e2974
commit b3842c69e8
5 changed files with 52 additions and 53 deletions

View File

@ -17,16 +17,23 @@ namespace cxxmph {
class dynamic_2bitset { class dynamic_2bitset {
public: public:
dynamic_2bitset() : fill_(false) {} dynamic_2bitset() : size_(0), fill_(false) {}
dynamic_2bitset(uint32_t size, bool fill = false) dynamic_2bitset(uint32_t size, bool fill = false)
: size_(size), fill_(fill), data_(ceil(size / 4.0), ones()*fill) { : size_(size), fill_(fill), data_(ceil(size / 4.0), ones()*fill) {
if (data_.size()) fprintf(stderr, "creating %p size %d\n", &data_[0], data_.size());
}
~dynamic_2bitset() {
if (data_.size()) fprintf(stderr, "Deleting %p size %d\n", &data_[0], data_.size());
} }
const uint8_t operator[](uint32_t i) const { return get(i); } const uint8_t operator[](uint32_t i) const { return get(i); }
uint8_t get(uint32_t i) const { const uint8_t get(uint32_t i) const {
assert(i < size());
assert((i >> 2) < data_.size());
return (data_[(i >> 2)] >> (((i & 3) << 1)) & 3); return (data_[(i >> 2)] >> (((i & 3) << 1)) & 3);
} }
uint8_t set(uint32_t i, uint8_t v) { uint8_t set(uint32_t i, uint8_t v) {
assert((i >> 2) < data_.size());
data_[(i >> 2)] |= ones() ^ dynamic_2bitset::vmask[i & 3]; data_[(i >> 2)] |= ones() ^ dynamic_2bitset::vmask[i & 3];
data_[(i >> 2)] &= ((v << ((i & 3) << 1)) | dynamic_2bitset::vmask[i & 3]); data_[(i >> 2)] &= ((v << ((i & 3) << 1)) | dynamic_2bitset::vmask[i & 3]);
assert(v <= 3); assert(v <= 3);
@ -39,17 +46,18 @@ class dynamic_2bitset {
void swap(dynamic_2bitset& other) { void swap(dynamic_2bitset& other) {
std::swap(other.size_, size_); std::swap(other.size_, size_);
std::swap(other.fill_, fill_); std::swap(other.fill_, fill_);
std::swap(other.data_, data_); other.data_.swap(data_);
} }
void clear() { data_.clear(); } void clear() { data_.clear(); size_ = 0; }
uint32_t size() const { return size_; } uint32_t size() const { return size_; }
static const uint8_t vmask[]; static const uint8_t vmask[];
private: const std::vector<uint8_t>& data() const { return data_; }
// private:
uint32_t size_; uint32_t size_;
bool fill_; bool fill_;
std::vector<uint8_t> data_; std::vector<uint8_t> data_;
uint8_t ones() { return std::numeric_limits<uint8_t>::max(); } const uint8_t ones() { return std::numeric_limits<uint8_t>::max(); }
}; };
static void set_2bit_value(uint8_t *d, uint32_t i, uint8_t v) { static void set_2bit_value(uint8_t *d, uint32_t i, uint8_t v) {
@ -67,6 +75,8 @@ static uint32_t nextpoweroftwo(uint32_t k) {
// Interesting bit tricks that might end up here: // Interesting bit tricks that might end up here:
// http://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord // http://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
// Fast a % (k*2^t)
// http://www.azillionmonkeys.com/qed/adiv.html
} // namespace cxxmph } // namespace cxxmph

View File

@ -40,10 +40,18 @@ int main(int argc, char** argv) {
if (size_corner1.size() != 1) exit(-1); if (size_corner1.size() != 1) exit(-1);
dynamic_2bitset size_corner2(2); dynamic_2bitset size_corner2(2);
if (size_corner2.size() != 2) exit(-1); if (size_corner2.size() != 2) exit(-1);
(dynamic_2bitset(4)).swap(size_corner2); (dynamic_2bitset(4, true)).swap(size_corner2);
if (size_corner2.size() != 4) exit(-1); if (size_corner2.size() != 4) exit(-1);
for (int i = 0; i < size_corner2.size(); ++i) {
if (size_corner2[i] != 3) exit(-1);
}
size_corner2.clear();
if (size_corner2.size() != 0) exit(-1);
dynamic_2bitset empty;
empty.clear();
dynamic_2bitset large(1000, true);
empty.swap(large);
} }

View File

@ -44,9 +44,6 @@ MPHIndex::~MPHIndex() {
} }
void MPHIndex::clear() { void MPHIndex::clear() {
if (!deserialized_) delete [] g_;
g_ = NULL;
g_size_ = 0;
if (!deserialized_) delete [] ranktable_; if (!deserialized_) delete [] ranktable_;
ranktable_ = NULL; ranktable_ = NULL;
ranktable_size_ = 0; ranktable_size_ = 0;
@ -113,13 +110,9 @@ void MPHIndex::Assigning(
const vector<TriGraph::Edge>& edges, const vector<uint32_t>& queue) { const vector<TriGraph::Edge>& edges, const vector<uint32_t>& queue) {
uint32_t current_edge = 0; uint32_t current_edge = 0;
vector<bool> marked_vertices(n_ + 1); vector<bool> marked_vertices(n_ + 1);
dynamic_2bitset().swap(g_);
// Initialize vector of half nibbles with all bits set. // Initialize vector of half nibbles with all bits set.
g_size_ = static_cast<uint32_t>(ceil(n_/4.0)); dynamic_2bitset g(n_, true /* set bits to 1 */);
if (!deserialized_) delete [] g_;
g_ = NULL;
uint8_t* g = new uint8_t[g_size_];
memset(g, std::numeric_limits<uint8_t>::max(), g_size_);
assert(g[g_size_ - 1] == 255);
uint32_t nedges = m_; // for legibility uint32_t nedges = m_; // for legibility
for (int i = nedges - 1; i + 1 >= 1; --i) { for (int i = nedges - 1; i + 1 >= 1; --i) {
@ -133,35 +126,35 @@ void MPHIndex::Assigning(
*/ */
if (!marked_vertices[e[0]]) { if (!marked_vertices[e[0]]) {
if (!marked_vertices[e[1]]) { if (!marked_vertices[e[1]]) {
set_2bit_value(g, e[1], kUnassigned); g.set(e[1], kUnassigned);
marked_vertices[e[1]] = true; marked_vertices[e[1]] = true;
} }
if (!marked_vertices[e[2]]) { if (!marked_vertices[e[2]]) {
set_2bit_value(g, e[2], kUnassigned); g.set(e[2], kUnassigned);
assert(marked_vertices.size() > e[2]); assert(marked_vertices.size() > e[2]);
marked_vertices[e[2]] = true; marked_vertices[e[2]] = true;
} }
set_2bit_value(g, e[0], (6 - (get_2bit_value(g, e[1]) + get_2bit_value(g, e[2]))) % 3); g.set(e[0], (6 - (g[e[1]] + g[e[2]])) % 3);
marked_vertices[e[0]] = true; marked_vertices[e[0]] = true;
} else if (!marked_vertices[e[1]]) { } else if (!marked_vertices[e[1]]) {
if (!marked_vertices[e[2]]) { if (!marked_vertices[e[2]]) {
set_2bit_value(g, e[2], kUnassigned); g.set(e[2], kUnassigned);
marked_vertices[e[2]] = true; marked_vertices[e[2]] = true;
} }
set_2bit_value(g, e[1], (7 - (get_2bit_value(g, e[0]) + get_2bit_value(g, e[2]))) % 3); g.set(e[1], (7 - (g[e[0]] + g[e[2]])) % 3);
marked_vertices[e[1]] = true; marked_vertices[e[1]] = true;
} else { } else {
set_2bit_value(g, e[2], (8 - (get_2bit_value(g, e[0]) + get_2bit_value(g, e[1]))) % 3); g.set(e[2], (8 - (g[e[0]] + g[e[1]])) % 3);
marked_vertices[e[2]] = true; marked_vertices[e[2]] = true;
} }
/* /*
cerr << "A: " << e[0] << " " << e[1] << " " << e[2] << " -> " cerr << "A: " << e[0] << " " << e[1] << " " << e[2] << " -> "
<< get_2bit_value(g, e[0]) << " " << static_cast<uint32_t>(g[e[0]]) << " "
<< get_2bit_value(g, e[1]) << " " << static_cast<uint32_t>(g[e[1]]) << " "
<< get_2bit_value(g, e[2]) << " " << endl; << static_cast<uint32_t>(g[e[2]]) << " " << endl;
*/ */
} }
g_ = g; g_.swap(g);
} }
void MPHIndex::Ranking() { void MPHIndex::Ranking() {
@ -194,19 +187,17 @@ uint32_t MPHIndex::Rank(uint32_t vertex) const {
uint32_t beg_idx_v = index << b_; uint32_t beg_idx_v = index << b_;
uint32_t beg_idx_b = beg_idx_v >> 2; uint32_t beg_idx_b = beg_idx_v >> 2;
uint32_t end_idx_b = vertex >> 2; uint32_t end_idx_b = vertex >> 2;
while (beg_idx_b < end_idx_b) base_rank += kBdzLookupIndex[g_[beg_idx_b++]]; while (beg_idx_b < end_idx_b) base_rank += kBdzLookupIndex[g_.data()[beg_idx_b++]];
beg_idx_v = beg_idx_b << 2; beg_idx_v = beg_idx_b << 2;
// cerr << "beg_idx_v: " << beg_idx_v << endl; // cerr << "beg_idx_v: " << beg_idx_v << endl;
// cerr << "base rank: " << base_rank << endl; // cerr << "base rank: " << base_rank << endl;
/*
cerr << "G: "; cerr << "G: ";
for (unsigned int i = 0; i < n_; ++i) { for (unsigned int i = 0; i < n_; ++i) {
cerr << get_2bit_value(g_, i) << " "; cerr << static_cast<uint32_t>(g_[i]) << " ";
} }
cerr << endl; cerr << endl;
*/
while (beg_idx_v < vertex) { while (beg_idx_v < vertex) {
if (get_2bit_value(g_, beg_idx_v) != kUnassigned) ++base_rank; if (g_[beg_idx_v] != kUnassigned) ++base_rank;
++beg_idx_v; ++beg_idx_v;
} }
// cerr << "Base rank: " << base_rank << endl; // cerr << "Base rank: " << base_rank << endl;
@ -214,21 +205,12 @@ uint32_t MPHIndex::Rank(uint32_t vertex) const {
} }
uint32_t MPHIndex::serialize_bytes_needed() const { uint32_t MPHIndex::serialize_bytes_needed() const {
return sizeof(MPHIndex) + g_size_ + ranktable_size_*sizeof(uint32_t); return 0;
} }
void MPHIndex::serialize(char* memory) const { void MPHIndex::serialize(char* memory) const {
memcpy(memory, this, sizeof(MPHIndex));
memcpy(memory + sizeof(MPHIndex), g_, g_size_);
memcpy(memory + sizeof(MPHIndex) + g_size_,
ranktable_, ranktable_size_*sizeof(uint32_t));
} }
bool MPHIndex::deserialize(const char* serialized_memory) { bool MPHIndex::deserialize(const char* serialized_memory) {
memcpy(this, serialized_memory, sizeof(MPHIndex));
g_ = reinterpret_cast<const uint8_t*>(serialized_memory + sizeof(MPHIndex));
ranktable_ = reinterpret_cast<const uint32_t*>(
serialized_memory + sizeof(MPHIndex) + g_size_);
deserialized_ = true;
return true; return true;
} }

View File

@ -45,7 +45,6 @@ class MPHIndex {
public: public:
MPHIndex(double c = 1.23, uint8_t b = 7) : MPHIndex(double c = 1.23, uint8_t b = 7) :
c_(c), b_(b), m_(0), n_(0), k_(0), r_(1), c_(c), b_(b), m_(0), n_(0), k_(0), r_(1),
g_(NULL), g_size_(0),
ranktable_(NULL), ranktable_size_(0), ranktable_(NULL), ranktable_size_(0),
deserialized_(false) { } deserialized_(false) { }
~MPHIndex(); ~MPHIndex();
@ -103,10 +102,8 @@ class MPHIndex {
uint32_t r_; uint32_t r_;
uint32_t nest_displacement_[3]; // derived from r_ uint32_t nest_displacement_[3]; // derived from r_
// The array containing the minimal perfect hash function graph. Do not use // The array containing the minimal perfect hash function graph.
// c++ vector to make mmap based backing easier. dynamic_2bitset g_;
const uint8_t* g_;
uint32_t g_size_;
uint8_t threebit_mod3[10]; // speed up mod3 calculation for 3bit ints uint8_t threebit_mod3[10]; // speed up mod3 calculation for 3bit ints
// The table used for the rank step of the minimal perfect hash function // The table used for the rank step of the minimal perfect hash function
const uint32_t* ranktable_; const uint32_t* ranktable_;
@ -156,6 +153,7 @@ bool MPHIndex::Reset(
} }
if (iterations == 0) return false; if (iterations == 0) return false;
Assigning(edges, queue); Assigning(edges, queue);
fprintf(stderr, "Assignment finished\n");
std::vector<TriGraph::Edge>().swap(edges); std::vector<TriGraph::Edge>().swap(edges);
Ranking(); Ranking();
deserialized_ = false; deserialized_ = false;
@ -192,7 +190,7 @@ void MPHIndex::hash_vector(const Key& key, uint32_t* h) const {
template <class SeededHashFcn, class Key> template <class SeededHashFcn, class Key>
uint32_t MPHIndex::perfect_hash(const Key& key) const { uint32_t MPHIndex::perfect_hash(const Key& key) const {
uint32_t h[4]; uint32_t h[4];
if (!g_size_) return 0; if (!g_.size()) return 0;
SeededHashFcn().hash64(key, hash_seed_[0], h); SeededHashFcn().hash64(key, hash_seed_[0], h);
h[0] = (h[0] % r_) + nest_displacement_[0]; h[0] = (h[0] % r_) + nest_displacement_[0];
h[1] = (h[1] % r_) + nest_displacement_[1]; h[1] = (h[1] % r_) + nest_displacement_[1];
@ -200,12 +198,11 @@ uint32_t MPHIndex::perfect_hash(const Key& key) const {
// h[0] = (h[0] & (r_-1)) + nest_displacement_[0]; // h[0] = (h[0] & (r_-1)) + nest_displacement_[0];
// h[1] = (h[1] & (r_-1)) + nest_displacement_[1]; // h[1] = (h[1] & (r_-1)) + nest_displacement_[1];
// h[2] = (h[2] & (r_-1)) + nest_displacement_[2]; // h[2] = (h[2] & (r_-1)) + nest_displacement_[2];
// cerr << "g_.size() " << g_size_ << " h0 >> 2 " << (h[0] >> 2) << endl; assert((h[0]) < g_.size());
assert((h[0] >> 2) <g_size_); assert((h[1]) < g_.size());
assert((h[1] >> 2) <g_size_); assert((h[2]) < g_.size());
assert((h[2] >> 2) <g_size_);
uint8_t nest = threebit_mod3[ uint8_t nest = threebit_mod3[
get_2bit_value(g_, h[0]) + get_2bit_value(g_, h[1]) + get_2bit_value(g_, h[2])]; g_[h[0]] + g_[h[1]] + g_[h[2]]];
uint32_t vertex = h[nest]; uint32_t vertex = h[nest];
return vertex; return vertex;
} }

View File

@ -33,8 +33,10 @@ int main(int argc, char** argv) {
cerr << endl; cerr << endl;
sort(ids.begin(), ids.end()); sort(ids.begin(), ids.end());
for (vector<int>::size_type i = 0; i < ids.size(); ++i) assert(ids[i] == static_cast<vector<int>::value_type>(i)); for (vector<int>::size_type i = 0; i < ids.size(); ++i) assert(ids[i] == static_cast<vector<int>::value_type>(i));
/*
char* serialized = new char[mph_index.serialize_bytes_needed()]; char* serialized = new char[mph_index.serialize_bytes_needed()];
mph_index.serialize(serialized); mph_index.serialize(serialized);
SimpleMPHIndex<string> other_mph_index; SimpleMPHIndex<string> other_mph_index;
other_mph_index.deserialize(serialized); other_mph_index.deserialize(serialized);
*/
} }