1
Fork 0

Add a swap function.

main
Davi de Castro Reis 2016-02-24 14:01:18 -05:00
parent 69f81ca7ba
commit 776ae2cbca
3 changed files with 48 additions and 20 deletions

View File

@ -43,10 +43,10 @@ MPHIndex::~MPHIndex() {
}
void MPHIndex::clear() {
delete [] ranktable_;
ranktable_ = NULL;
ranktable_size_ = 0;
// TODO(davi) implement me
std::vector<uint32_t> empty_ranktable;
ranktable_.swap(empty_ranktable);
dynamic_2bitset empty_g;
g_.swap(empty_g);
}
bool MPHIndex::GenerateQueue(
@ -159,17 +159,14 @@ void MPHIndex::Assigning(
void MPHIndex::Ranking() {
uint32_t nbytes_total = static_cast<uint32_t>(ceil(n_ / 4.0));
uint32_t size = k_ >> 2U;
ranktable_size_ = static_cast<uint32_t>(
uint32_t ranktable_size = static_cast<uint32_t>(
ceil(n_ / static_cast<double>(k_)));
delete [] ranktable_;
ranktable_ = NULL;
uint32_t* ranktable = new uint32_t[ranktable_size_];
memset(ranktable, 0, ranktable_size_*sizeof(uint32_t));
vector<uint32_t> ranktable(ranktable_size);
uint32_t offset = 0;
uint32_t count = 0;
uint32_t i = 1;
while (1) {
if (i == ranktable_size_) break;
if (i == ranktable.size()) break;
uint32_t nbytes = size < nbytes_total ? size : nbytes_total;
for (uint32_t j = 0; j < nbytes; ++j) {
count += kBdzLookupIndex[g_.data()[offset + j]];
@ -179,11 +176,11 @@ void MPHIndex::Ranking() {
nbytes_total -= size;
++i;
}
ranktable_ = ranktable;
ranktable_.swap(ranktable);
}
uint32_t MPHIndex::Rank(uint32_t vertex) const {
if (!ranktable_size_) return 0;
if (ranktable_.empty()) return 0;
uint32_t index = vertex >> b_;
uint32_t base_rank = ranktable_[index];
uint32_t beg_idx_v = index << b_;
@ -211,4 +208,22 @@ uint32_t MPHIndex::Rank(uint32_t vertex) const {
return base_rank;
}
void MPHIndex::swap(std::vector<uint32_t>& params, dynamic_2bitset& g, std::vector<uint32_t>& ranktable) {
params.resize(12);
uint32_t rounded_c = c_ * 1000 * 1000;
std::swap(params[0], rounded_c);
c_ = static_cast<double>(rounded_c) / 1000 / 1000;
std::swap(params[1], m_);
std::swap(params[2], n_);
std::swap(params[3], k_);
uint32_t uint32_square = static_cast<uint32_t>(square_);
std::swap(params[4], uint32_square);
square_ = uint32_square;
std::swap(params[5], hash_seed_[0]);
std::swap(params[6], hash_seed_[1]);
std::swap(params[7], hash_seed_[2]);
g.swap(g_);
ranktable.swap(ranktable_);
}
} // namespace cxxmph

View File

@ -45,8 +45,7 @@ namespace cxxmph {
class MPHIndex {
public:
MPHIndex(bool square = false, double c = 1.23, uint8_t b = 7) :
c_(c), b_(b), m_(0), n_(0), k_(0), square_(square), r_(1), g_(8, true),
ranktable_(NULL), ranktable_size_(0) {
c_(c), b_(b), m_(0), n_(0), k_(0), square_(square), r_(1), g_(8, true) {
nest_displacement_[0] = 0;
nest_displacement_[1] = r_;
nest_displacement_[2] = (r_ << 1);
@ -63,7 +62,7 @@ class MPHIndex {
void clear();
// Advanced users functions. Please avoid unless you know what you are doing.
uint32_t perfect_hash_size() const { return n_; }
uint32_t perfect_hash_size() const { return n_; }
template <class SeededHashFcn, class Key> // must agree with Reset
uint32_t perfect_hash(const Key& x) const; // way faster than the minimal
template <class SeededHashFcn, class Key> // must agree with Reset
@ -72,6 +71,11 @@ class MPHIndex {
template <class SeededHashFcn, class Key> // must agree with Reset
uint32_t minimal_perfect_hash(const Key& x) const;
// Experimental api to use as a serialization building block.
// Since this signature exposes some implementation details, expect it to
// change.
void swap(std::vector<uint32_t>& params, dynamic_2bitset& g, std::vector<uint32_t>& ranktable);
private:
template <class SeededHashFcn, class ForwardIterator>
bool Mapping(ForwardIterator begin, ForwardIterator end,
@ -85,7 +89,7 @@ class MPHIndex {
// Algorithm parameters
// Perfect hash function density. If this was a 2graph,
// then probability of having an acyclic graph would be
// then probability of having an acyclic graph would be
// sqrt(1-(2/c)^2). See section 3 for details.
// http://www.it-c.dk/people/pagh/papers/simpleperf.pdf
double c_;
@ -107,8 +111,7 @@ class MPHIndex {
dynamic_2bitset g_;
uint8_t threebit_mod3[10]; // speed up mod3 calculation for 3bit ints
// The table used for the rank step of the minimal perfect hash function
const uint32_t* ranktable_;
uint32_t ranktable_size_;
std::vector<uint32_t> ranktable_;
// The selected hash seed triplet for finding the edges in the minimal
// perfect hash function graph.
uint32_t hash_seed_[3];
@ -142,7 +145,7 @@ bool MPHIndex::Reset(
std::vector<TriGraph::Edge> edges;
std::vector<uint32_t> queue;
while (1) {
// cerr << "Iterations missing: " << iterations << endl;
cerr << "Iterations missing: " << iterations << endl;
for (int i = 0; i < 3; ++i) hash_seed_[i] = random();
if (Mapping<SeededHashFcn>(begin, end, &edges, &queue)) break;
else --iterations;
@ -160,7 +163,7 @@ bool MPHIndex::Mapping(
ForwardIterator begin, ForwardIterator end,
std::vector<TriGraph::Edge>* edges, std::vector<uint32_t>* queue) {
TriGraph graph(n_, m_);
for (ForwardIterator it = begin; it != end; ++it) {
for (ForwardIterator it = begin; it != end; ++it) {
h128 h = SeededHashFcn().hash128(*it, hash_seed_[0]);
// for (int i = 0; i < 3; ++i) h[i] = SeededHashFcn()(*it, hash_seed_[i]);
uint32_t v0 = h[0] % r_;

View File

@ -34,6 +34,16 @@ int main(int argc, char** argv) {
sort(ids.begin(), ids.end());
for (vector<int>::size_type i = 0; i < ids.size(); ++i) assert(ids[i] == static_cast<vector<int>::value_type>(i));
// Test serialization
vector<uint32_t> params;
dynamic_2bitset g;
vector<uint32_t> ranktable;
mph_index.swap(params, g, ranktable);
assert(mph_index.size() == 0);
mph_index.swap(params, g, ranktable);
assert(mph_index.size() == ids.size());
for (vector<int>::size_type i = 0; i < ids.size(); ++i) assert(ids[i] == static_cast<vector<int>::value_type>(i));
FlexibleMPHIndex<false, true, int64_t, seeded_hash<std::hash<int64_t>>::hash_function> square_empty;
auto id = square_empty.index(1);
FlexibleMPHIndex<false, false, int64_t, seeded_hash<std::hash<int64_t>>::hash_function> unordered_empty;