New bit code works, need to cleanup logging.
This commit is contained in:
parent
50ac0e2974
commit
b3842c69e8
@ -17,16 +17,23 @@ namespace cxxmph {
|
||||
|
||||
class dynamic_2bitset {
|
||||
public:
|
||||
dynamic_2bitset() : fill_(false) {}
|
||||
dynamic_2bitset() : size_(0), fill_(false) {}
|
||||
dynamic_2bitset(uint32_t size, bool fill = false)
|
||||
: size_(size), fill_(fill), data_(ceil(size / 4.0), ones()*fill) {
|
||||
if (data_.size()) fprintf(stderr, "creating %p size %d\n", &data_[0], data_.size());
|
||||
}
|
||||
~dynamic_2bitset() {
|
||||
if (data_.size()) fprintf(stderr, "Deleting %p size %d\n", &data_[0], data_.size());
|
||||
}
|
||||
|
||||
const uint8_t operator[](uint32_t i) const { return get(i); }
|
||||
uint8_t get(uint32_t i) const {
|
||||
const uint8_t get(uint32_t i) const {
|
||||
assert(i < size());
|
||||
assert((i >> 2) < data_.size());
|
||||
return (data_[(i >> 2)] >> (((i & 3) << 1)) & 3);
|
||||
}
|
||||
uint8_t set(uint32_t i, uint8_t v) {
|
||||
assert((i >> 2) < data_.size());
|
||||
data_[(i >> 2)] |= ones() ^ dynamic_2bitset::vmask[i & 3];
|
||||
data_[(i >> 2)] &= ((v << ((i & 3) << 1)) | dynamic_2bitset::vmask[i & 3]);
|
||||
assert(v <= 3);
|
||||
@ -39,17 +46,18 @@ class dynamic_2bitset {
|
||||
void swap(dynamic_2bitset& other) {
|
||||
std::swap(other.size_, size_);
|
||||
std::swap(other.fill_, fill_);
|
||||
std::swap(other.data_, data_);
|
||||
other.data_.swap(data_);
|
||||
}
|
||||
void clear() { data_.clear(); }
|
||||
void clear() { data_.clear(); size_ = 0; }
|
||||
|
||||
uint32_t size() const { return size_; }
|
||||
static const uint8_t vmask[];
|
||||
private:
|
||||
const std::vector<uint8_t>& data() const { return data_; }
|
||||
// private:
|
||||
uint32_t size_;
|
||||
bool fill_;
|
||||
std::vector<uint8_t> data_;
|
||||
uint8_t ones() { return std::numeric_limits<uint8_t>::max(); }
|
||||
const uint8_t ones() { return std::numeric_limits<uint8_t>::max(); }
|
||||
};
|
||||
|
||||
static void set_2bit_value(uint8_t *d, uint32_t i, uint8_t v) {
|
||||
@ -67,6 +75,8 @@ static uint32_t nextpoweroftwo(uint32_t k) {
|
||||
|
||||
// Interesting bit tricks that might end up here:
|
||||
// http://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
|
||||
// Fast a % (k*2^t)
|
||||
// http://www.azillionmonkeys.com/qed/adiv.html
|
||||
|
||||
} // namespace cxxmph
|
||||
|
||||
|
@ -40,10 +40,18 @@ int main(int argc, char** argv) {
|
||||
if (size_corner1.size() != 1) exit(-1);
|
||||
dynamic_2bitset size_corner2(2);
|
||||
if (size_corner2.size() != 2) exit(-1);
|
||||
(dynamic_2bitset(4)).swap(size_corner2);
|
||||
(dynamic_2bitset(4, true)).swap(size_corner2);
|
||||
if (size_corner2.size() != 4) exit(-1);
|
||||
for (int i = 0; i < size_corner2.size(); ++i) {
|
||||
if (size_corner2[i] != 3) exit(-1);
|
||||
}
|
||||
size_corner2.clear();
|
||||
if (size_corner2.size() != 0) exit(-1);
|
||||
|
||||
|
||||
dynamic_2bitset empty;
|
||||
empty.clear();
|
||||
dynamic_2bitset large(1000, true);
|
||||
empty.swap(large);
|
||||
}
|
||||
|
||||
|
||||
|
@ -44,9 +44,6 @@ MPHIndex::~MPHIndex() {
|
||||
}
|
||||
|
||||
void MPHIndex::clear() {
|
||||
if (!deserialized_) delete [] g_;
|
||||
g_ = NULL;
|
||||
g_size_ = 0;
|
||||
if (!deserialized_) delete [] ranktable_;
|
||||
ranktable_ = NULL;
|
||||
ranktable_size_ = 0;
|
||||
@ -113,13 +110,9 @@ void MPHIndex::Assigning(
|
||||
const vector<TriGraph::Edge>& edges, const vector<uint32_t>& queue) {
|
||||
uint32_t current_edge = 0;
|
||||
vector<bool> marked_vertices(n_ + 1);
|
||||
dynamic_2bitset().swap(g_);
|
||||
// Initialize vector of half nibbles with all bits set.
|
||||
g_size_ = static_cast<uint32_t>(ceil(n_/4.0));
|
||||
if (!deserialized_) delete [] g_;
|
||||
g_ = NULL;
|
||||
uint8_t* g = new uint8_t[g_size_];
|
||||
memset(g, std::numeric_limits<uint8_t>::max(), g_size_);
|
||||
assert(g[g_size_ - 1] == 255);
|
||||
dynamic_2bitset g(n_, true /* set bits to 1 */);
|
||||
|
||||
uint32_t nedges = m_; // for legibility
|
||||
for (int i = nedges - 1; i + 1 >= 1; --i) {
|
||||
@ -133,35 +126,35 @@ void MPHIndex::Assigning(
|
||||
*/
|
||||
if (!marked_vertices[e[0]]) {
|
||||
if (!marked_vertices[e[1]]) {
|
||||
set_2bit_value(g, e[1], kUnassigned);
|
||||
g.set(e[1], kUnassigned);
|
||||
marked_vertices[e[1]] = true;
|
||||
}
|
||||
if (!marked_vertices[e[2]]) {
|
||||
set_2bit_value(g, e[2], kUnassigned);
|
||||
g.set(e[2], kUnassigned);
|
||||
assert(marked_vertices.size() > e[2]);
|
||||
marked_vertices[e[2]] = true;
|
||||
}
|
||||
set_2bit_value(g, e[0], (6 - (get_2bit_value(g, e[1]) + get_2bit_value(g, e[2]))) % 3);
|
||||
g.set(e[0], (6 - (g[e[1]] + g[e[2]])) % 3);
|
||||
marked_vertices[e[0]] = true;
|
||||
} else if (!marked_vertices[e[1]]) {
|
||||
if (!marked_vertices[e[2]]) {
|
||||
set_2bit_value(g, e[2], kUnassigned);
|
||||
g.set(e[2], kUnassigned);
|
||||
marked_vertices[e[2]] = true;
|
||||
}
|
||||
set_2bit_value(g, e[1], (7 - (get_2bit_value(g, e[0]) + get_2bit_value(g, e[2]))) % 3);
|
||||
g.set(e[1], (7 - (g[e[0]] + g[e[2]])) % 3);
|
||||
marked_vertices[e[1]] = true;
|
||||
} else {
|
||||
set_2bit_value(g, e[2], (8 - (get_2bit_value(g, e[0]) + get_2bit_value(g, e[1]))) % 3);
|
||||
g.set(e[2], (8 - (g[e[0]] + g[e[1]])) % 3);
|
||||
marked_vertices[e[2]] = true;
|
||||
}
|
||||
/*
|
||||
cerr << "A: " << e[0] << " " << e[1] << " " << e[2] << " -> "
|
||||
<< get_2bit_value(g, e[0]) << " "
|
||||
<< get_2bit_value(g, e[1]) << " "
|
||||
<< get_2bit_value(g, e[2]) << " " << endl;
|
||||
<< static_cast<uint32_t>(g[e[0]]) << " "
|
||||
<< static_cast<uint32_t>(g[e[1]]) << " "
|
||||
<< static_cast<uint32_t>(g[e[2]]) << " " << endl;
|
||||
*/
|
||||
}
|
||||
g_ = g;
|
||||
g_.swap(g);
|
||||
}
|
||||
|
||||
void MPHIndex::Ranking() {
|
||||
@ -194,19 +187,17 @@ uint32_t MPHIndex::Rank(uint32_t vertex) const {
|
||||
uint32_t beg_idx_v = index << b_;
|
||||
uint32_t beg_idx_b = beg_idx_v >> 2;
|
||||
uint32_t end_idx_b = vertex >> 2;
|
||||
while (beg_idx_b < end_idx_b) base_rank += kBdzLookupIndex[g_[beg_idx_b++]];
|
||||
while (beg_idx_b < end_idx_b) base_rank += kBdzLookupIndex[g_.data()[beg_idx_b++]];
|
||||
beg_idx_v = beg_idx_b << 2;
|
||||
// cerr << "beg_idx_v: " << beg_idx_v << endl;
|
||||
// cerr << "base rank: " << base_rank << endl;
|
||||
/*
|
||||
cerr << "G: ";
|
||||
for (unsigned int i = 0; i < n_; ++i) {
|
||||
cerr << get_2bit_value(g_, i) << " ";
|
||||
cerr << static_cast<uint32_t>(g_[i]) << " ";
|
||||
}
|
||||
cerr << endl;
|
||||
*/
|
||||
while (beg_idx_v < vertex) {
|
||||
if (get_2bit_value(g_, beg_idx_v) != kUnassigned) ++base_rank;
|
||||
if (g_[beg_idx_v] != kUnassigned) ++base_rank;
|
||||
++beg_idx_v;
|
||||
}
|
||||
// cerr << "Base rank: " << base_rank << endl;
|
||||
@ -214,21 +205,12 @@ uint32_t MPHIndex::Rank(uint32_t vertex) const {
|
||||
}
|
||||
|
||||
uint32_t MPHIndex::serialize_bytes_needed() const {
|
||||
return sizeof(MPHIndex) + g_size_ + ranktable_size_*sizeof(uint32_t);
|
||||
return 0;
|
||||
}
|
||||
void MPHIndex::serialize(char* memory) const {
|
||||
memcpy(memory, this, sizeof(MPHIndex));
|
||||
memcpy(memory + sizeof(MPHIndex), g_, g_size_);
|
||||
memcpy(memory + sizeof(MPHIndex) + g_size_,
|
||||
ranktable_, ranktable_size_*sizeof(uint32_t));
|
||||
}
|
||||
|
||||
bool MPHIndex::deserialize(const char* serialized_memory) {
|
||||
memcpy(this, serialized_memory, sizeof(MPHIndex));
|
||||
g_ = reinterpret_cast<const uint8_t*>(serialized_memory + sizeof(MPHIndex));
|
||||
ranktable_ = reinterpret_cast<const uint32_t*>(
|
||||
serialized_memory + sizeof(MPHIndex) + g_size_);
|
||||
deserialized_ = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -45,7 +45,6 @@ class MPHIndex {
|
||||
public:
|
||||
MPHIndex(double c = 1.23, uint8_t b = 7) :
|
||||
c_(c), b_(b), m_(0), n_(0), k_(0), r_(1),
|
||||
g_(NULL), g_size_(0),
|
||||
ranktable_(NULL), ranktable_size_(0),
|
||||
deserialized_(false) { }
|
||||
~MPHIndex();
|
||||
@ -103,10 +102,8 @@ class MPHIndex {
|
||||
uint32_t r_;
|
||||
uint32_t nest_displacement_[3]; // derived from r_
|
||||
|
||||
// The array containing the minimal perfect hash function graph. Do not use
|
||||
// c++ vector to make mmap based backing easier.
|
||||
const uint8_t* g_;
|
||||
uint32_t g_size_;
|
||||
// The array containing the minimal perfect hash function graph.
|
||||
dynamic_2bitset g_;
|
||||
uint8_t threebit_mod3[10]; // speed up mod3 calculation for 3bit ints
|
||||
// The table used for the rank step of the minimal perfect hash function
|
||||
const uint32_t* ranktable_;
|
||||
@ -156,6 +153,7 @@ bool MPHIndex::Reset(
|
||||
}
|
||||
if (iterations == 0) return false;
|
||||
Assigning(edges, queue);
|
||||
fprintf(stderr, "Assignment finished\n");
|
||||
std::vector<TriGraph::Edge>().swap(edges);
|
||||
Ranking();
|
||||
deserialized_ = false;
|
||||
@ -192,7 +190,7 @@ void MPHIndex::hash_vector(const Key& key, uint32_t* h) const {
|
||||
template <class SeededHashFcn, class Key>
|
||||
uint32_t MPHIndex::perfect_hash(const Key& key) const {
|
||||
uint32_t h[4];
|
||||
if (!g_size_) return 0;
|
||||
if (!g_.size()) return 0;
|
||||
SeededHashFcn().hash64(key, hash_seed_[0], h);
|
||||
h[0] = (h[0] % r_) + nest_displacement_[0];
|
||||
h[1] = (h[1] % r_) + nest_displacement_[1];
|
||||
@ -200,12 +198,11 @@ uint32_t MPHIndex::perfect_hash(const Key& key) const {
|
||||
// h[0] = (h[0] & (r_-1)) + nest_displacement_[0];
|
||||
// h[1] = (h[1] & (r_-1)) + nest_displacement_[1];
|
||||
// h[2] = (h[2] & (r_-1)) + nest_displacement_[2];
|
||||
// cerr << "g_.size() " << g_size_ << " h0 >> 2 " << (h[0] >> 2) << endl;
|
||||
assert((h[0] >> 2) <g_size_);
|
||||
assert((h[1] >> 2) <g_size_);
|
||||
assert((h[2] >> 2) <g_size_);
|
||||
assert((h[0]) < g_.size());
|
||||
assert((h[1]) < g_.size());
|
||||
assert((h[2]) < g_.size());
|
||||
uint8_t nest = threebit_mod3[
|
||||
get_2bit_value(g_, h[0]) + get_2bit_value(g_, h[1]) + get_2bit_value(g_, h[2])];
|
||||
g_[h[0]] + g_[h[1]] + g_[h[2]]];
|
||||
uint32_t vertex = h[nest];
|
||||
return vertex;
|
||||
}
|
||||
|
@ -33,8 +33,10 @@ int main(int argc, char** argv) {
|
||||
cerr << endl;
|
||||
sort(ids.begin(), ids.end());
|
||||
for (vector<int>::size_type i = 0; i < ids.size(); ++i) assert(ids[i] == static_cast<vector<int>::value_type>(i));
|
||||
/*
|
||||
char* serialized = new char[mph_index.serialize_bytes_needed()];
|
||||
mph_index.serialize(serialized);
|
||||
SimpleMPHIndex<string> other_mph_index;
|
||||
other_mph_index.deserialize(serialized);
|
||||
*/
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user