New bit code works, need to cleanup logging.

This commit is contained in:
Davi Reis 2012-03-19 03:10:42 -03:00
parent 50ac0e2974
commit b3842c69e8
5 changed files with 52 additions and 53 deletions

View File

@ -17,16 +17,23 @@ namespace cxxmph {
class dynamic_2bitset {
public:
dynamic_2bitset() : fill_(false) {}
dynamic_2bitset() : size_(0), fill_(false) {}
dynamic_2bitset(uint32_t size, bool fill = false)
: size_(size), fill_(fill), data_(ceil(size / 4.0), ones()*fill) {
if (data_.size()) fprintf(stderr, "creating %p size %d\n", &data_[0], data_.size());
}
~dynamic_2bitset() {
if (data_.size()) fprintf(stderr, "Deleting %p size %d\n", &data_[0], data_.size());
}
const uint8_t operator[](uint32_t i) const { return get(i); }
uint8_t get(uint32_t i) const {
const uint8_t get(uint32_t i) const {
assert(i < size());
assert((i >> 2) < data_.size());
return (data_[(i >> 2)] >> (((i & 3) << 1)) & 3);
}
uint8_t set(uint32_t i, uint8_t v) {
assert((i >> 2) < data_.size());
data_[(i >> 2)] |= ones() ^ dynamic_2bitset::vmask[i & 3];
data_[(i >> 2)] &= ((v << ((i & 3) << 1)) | dynamic_2bitset::vmask[i & 3]);
assert(v <= 3);
@ -39,17 +46,18 @@ class dynamic_2bitset {
void swap(dynamic_2bitset& other) {
std::swap(other.size_, size_);
std::swap(other.fill_, fill_);
std::swap(other.data_, data_);
other.data_.swap(data_);
}
void clear() { data_.clear(); }
void clear() { data_.clear(); size_ = 0; }
uint32_t size() const { return size_; }
static const uint8_t vmask[];
private:
const std::vector<uint8_t>& data() const { return data_; }
// private:
uint32_t size_;
bool fill_;
std::vector<uint8_t> data_;
uint8_t ones() { return std::numeric_limits<uint8_t>::max(); }
const uint8_t ones() { return std::numeric_limits<uint8_t>::max(); }
};
static void set_2bit_value(uint8_t *d, uint32_t i, uint8_t v) {
@ -67,6 +75,8 @@ static uint32_t nextpoweroftwo(uint32_t k) {
// Interesting bit tricks that might end up here:
// http://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
// Fast a % (k*2^t)
// http://www.azillionmonkeys.com/qed/adiv.html
} // namespace cxxmph

View File

@ -40,10 +40,18 @@ int main(int argc, char** argv) {
if (size_corner1.size() != 1) exit(-1);
dynamic_2bitset size_corner2(2);
if (size_corner2.size() != 2) exit(-1);
(dynamic_2bitset(4)).swap(size_corner2);
(dynamic_2bitset(4, true)).swap(size_corner2);
if (size_corner2.size() != 4) exit(-1);
for (int i = 0; i < size_corner2.size(); ++i) {
if (size_corner2[i] != 3) exit(-1);
}
size_corner2.clear();
if (size_corner2.size() != 0) exit(-1);
dynamic_2bitset empty;
empty.clear();
dynamic_2bitset large(1000, true);
empty.swap(large);
}

View File

@ -44,9 +44,6 @@ MPHIndex::~MPHIndex() {
}
void MPHIndex::clear() {
if (!deserialized_) delete [] g_;
g_ = NULL;
g_size_ = 0;
if (!deserialized_) delete [] ranktable_;
ranktable_ = NULL;
ranktable_size_ = 0;
@ -113,13 +110,9 @@ void MPHIndex::Assigning(
const vector<TriGraph::Edge>& edges, const vector<uint32_t>& queue) {
uint32_t current_edge = 0;
vector<bool> marked_vertices(n_ + 1);
dynamic_2bitset().swap(g_);
// Initialize vector of half nibbles with all bits set.
g_size_ = static_cast<uint32_t>(ceil(n_/4.0));
if (!deserialized_) delete [] g_;
g_ = NULL;
uint8_t* g = new uint8_t[g_size_];
memset(g, std::numeric_limits<uint8_t>::max(), g_size_);
assert(g[g_size_ - 1] == 255);
dynamic_2bitset g(n_, true /* set bits to 1 */);
uint32_t nedges = m_; // for legibility
for (int i = nedges - 1; i + 1 >= 1; --i) {
@ -133,35 +126,35 @@ void MPHIndex::Assigning(
*/
if (!marked_vertices[e[0]]) {
if (!marked_vertices[e[1]]) {
set_2bit_value(g, e[1], kUnassigned);
g.set(e[1], kUnassigned);
marked_vertices[e[1]] = true;
}
if (!marked_vertices[e[2]]) {
set_2bit_value(g, e[2], kUnassigned);
g.set(e[2], kUnassigned);
assert(marked_vertices.size() > e[2]);
marked_vertices[e[2]] = true;
}
set_2bit_value(g, e[0], (6 - (get_2bit_value(g, e[1]) + get_2bit_value(g, e[2]))) % 3);
g.set(e[0], (6 - (g[e[1]] + g[e[2]])) % 3);
marked_vertices[e[0]] = true;
} else if (!marked_vertices[e[1]]) {
if (!marked_vertices[e[2]]) {
set_2bit_value(g, e[2], kUnassigned);
g.set(e[2], kUnassigned);
marked_vertices[e[2]] = true;
}
set_2bit_value(g, e[1], (7 - (get_2bit_value(g, e[0]) + get_2bit_value(g, e[2]))) % 3);
g.set(e[1], (7 - (g[e[0]] + g[e[2]])) % 3);
marked_vertices[e[1]] = true;
} else {
set_2bit_value(g, e[2], (8 - (get_2bit_value(g, e[0]) + get_2bit_value(g, e[1]))) % 3);
g.set(e[2], (8 - (g[e[0]] + g[e[1]])) % 3);
marked_vertices[e[2]] = true;
}
/*
cerr << "A: " << e[0] << " " << e[1] << " " << e[2] << " -> "
<< get_2bit_value(g, e[0]) << " "
<< get_2bit_value(g, e[1]) << " "
<< get_2bit_value(g, e[2]) << " " << endl;
<< static_cast<uint32_t>(g[e[0]]) << " "
<< static_cast<uint32_t>(g[e[1]]) << " "
<< static_cast<uint32_t>(g[e[2]]) << " " << endl;
*/
}
g_ = g;
g_.swap(g);
}
void MPHIndex::Ranking() {
@ -194,19 +187,17 @@ uint32_t MPHIndex::Rank(uint32_t vertex) const {
uint32_t beg_idx_v = index << b_;
uint32_t beg_idx_b = beg_idx_v >> 2;
uint32_t end_idx_b = vertex >> 2;
while (beg_idx_b < end_idx_b) base_rank += kBdzLookupIndex[g_[beg_idx_b++]];
while (beg_idx_b < end_idx_b) base_rank += kBdzLookupIndex[g_.data()[beg_idx_b++]];
beg_idx_v = beg_idx_b << 2;
// cerr << "beg_idx_v: " << beg_idx_v << endl;
// cerr << "base rank: " << base_rank << endl;
/*
cerr << "G: ";
for (unsigned int i = 0; i < n_; ++i) {
cerr << get_2bit_value(g_, i) << " ";
cerr << static_cast<uint32_t>(g_[i]) << " ";
}
cerr << endl;
*/
while (beg_idx_v < vertex) {
if (get_2bit_value(g_, beg_idx_v) != kUnassigned) ++base_rank;
if (g_[beg_idx_v] != kUnassigned) ++base_rank;
++beg_idx_v;
}
// cerr << "Base rank: " << base_rank << endl;
@ -214,21 +205,12 @@ uint32_t MPHIndex::Rank(uint32_t vertex) const {
}
uint32_t MPHIndex::serialize_bytes_needed() const {
return sizeof(MPHIndex) + g_size_ + ranktable_size_*sizeof(uint32_t);
return 0;
}
void MPHIndex::serialize(char* memory) const {
memcpy(memory, this, sizeof(MPHIndex));
memcpy(memory + sizeof(MPHIndex), g_, g_size_);
memcpy(memory + sizeof(MPHIndex) + g_size_,
ranktable_, ranktable_size_*sizeof(uint32_t));
}
bool MPHIndex::deserialize(const char* serialized_memory) {
memcpy(this, serialized_memory, sizeof(MPHIndex));
g_ = reinterpret_cast<const uint8_t*>(serialized_memory + sizeof(MPHIndex));
ranktable_ = reinterpret_cast<const uint32_t*>(
serialized_memory + sizeof(MPHIndex) + g_size_);
deserialized_ = true;
return true;
}

View File

@ -45,7 +45,6 @@ class MPHIndex {
public:
MPHIndex(double c = 1.23, uint8_t b = 7) :
c_(c), b_(b), m_(0), n_(0), k_(0), r_(1),
g_(NULL), g_size_(0),
ranktable_(NULL), ranktable_size_(0),
deserialized_(false) { }
~MPHIndex();
@ -103,10 +102,8 @@ class MPHIndex {
uint32_t r_;
uint32_t nest_displacement_[3]; // derived from r_
// The array containing the minimal perfect hash function graph. Do not use
// c++ vector to make mmap based backing easier.
const uint8_t* g_;
uint32_t g_size_;
// The array containing the minimal perfect hash function graph.
dynamic_2bitset g_;
uint8_t threebit_mod3[10]; // speed up mod3 calculation for 3bit ints
// The table used for the rank step of the minimal perfect hash function
const uint32_t* ranktable_;
@ -156,6 +153,7 @@ bool MPHIndex::Reset(
}
if (iterations == 0) return false;
Assigning(edges, queue);
fprintf(stderr, "Assignment finished\n");
std::vector<TriGraph::Edge>().swap(edges);
Ranking();
deserialized_ = false;
@ -192,7 +190,7 @@ void MPHIndex::hash_vector(const Key& key, uint32_t* h) const {
template <class SeededHashFcn, class Key>
uint32_t MPHIndex::perfect_hash(const Key& key) const {
uint32_t h[4];
if (!g_size_) return 0;
if (!g_.size()) return 0;
SeededHashFcn().hash64(key, hash_seed_[0], h);
h[0] = (h[0] % r_) + nest_displacement_[0];
h[1] = (h[1] % r_) + nest_displacement_[1];
@ -200,12 +198,11 @@ uint32_t MPHIndex::perfect_hash(const Key& key) const {
// h[0] = (h[0] & (r_-1)) + nest_displacement_[0];
// h[1] = (h[1] & (r_-1)) + nest_displacement_[1];
// h[2] = (h[2] & (r_-1)) + nest_displacement_[2];
// cerr << "g_.size() " << g_size_ << " h0 >> 2 " << (h[0] >> 2) << endl;
assert((h[0] >> 2) <g_size_);
assert((h[1] >> 2) <g_size_);
assert((h[2] >> 2) <g_size_);
assert((h[0]) < g_.size());
assert((h[1]) < g_.size());
assert((h[2]) < g_.size());
uint8_t nest = threebit_mod3[
get_2bit_value(g_, h[0]) + get_2bit_value(g_, h[1]) + get_2bit_value(g_, h[2])];
g_[h[0]] + g_[h[1]] + g_[h[2]]];
uint32_t vertex = h[nest];
return vertex;
}

View File

@ -33,8 +33,10 @@ int main(int argc, char** argv) {
cerr << endl;
sort(ids.begin(), ids.end());
for (vector<int>::size_type i = 0; i < ids.size(); ++i) assert(ids[i] == static_cast<vector<int>::value_type>(i));
/*
char* serialized = new char[mph_index.serialize_bytes_needed()];
mph_index.serialize(serialized);
SimpleMPHIndex<string> other_mph_index;
other_mph_index.deserialize(serialized);
*/
}