Add a swap function.
This commit is contained in:
parent
69f81ca7ba
commit
776ae2cbca
@ -43,10 +43,10 @@ MPHIndex::~MPHIndex() {
|
||||
}
|
||||
|
||||
void MPHIndex::clear() {
|
||||
delete [] ranktable_;
|
||||
ranktable_ = NULL;
|
||||
ranktable_size_ = 0;
|
||||
// TODO(davi) implement me
|
||||
std::vector<uint32_t> empty_ranktable;
|
||||
ranktable_.swap(empty_ranktable);
|
||||
dynamic_2bitset empty_g;
|
||||
g_.swap(empty_g);
|
||||
}
|
||||
|
||||
bool MPHIndex::GenerateQueue(
|
||||
@ -159,17 +159,14 @@ void MPHIndex::Assigning(
|
||||
void MPHIndex::Ranking() {
|
||||
uint32_t nbytes_total = static_cast<uint32_t>(ceil(n_ / 4.0));
|
||||
uint32_t size = k_ >> 2U;
|
||||
ranktable_size_ = static_cast<uint32_t>(
|
||||
uint32_t ranktable_size = static_cast<uint32_t>(
|
||||
ceil(n_ / static_cast<double>(k_)));
|
||||
delete [] ranktable_;
|
||||
ranktable_ = NULL;
|
||||
uint32_t* ranktable = new uint32_t[ranktable_size_];
|
||||
memset(ranktable, 0, ranktable_size_*sizeof(uint32_t));
|
||||
vector<uint32_t> ranktable(ranktable_size);
|
||||
uint32_t offset = 0;
|
||||
uint32_t count = 0;
|
||||
uint32_t i = 1;
|
||||
while (1) {
|
||||
if (i == ranktable_size_) break;
|
||||
if (i == ranktable.size()) break;
|
||||
uint32_t nbytes = size < nbytes_total ? size : nbytes_total;
|
||||
for (uint32_t j = 0; j < nbytes; ++j) {
|
||||
count += kBdzLookupIndex[g_.data()[offset + j]];
|
||||
@ -179,11 +176,11 @@ void MPHIndex::Ranking() {
|
||||
nbytes_total -= size;
|
||||
++i;
|
||||
}
|
||||
ranktable_ = ranktable;
|
||||
ranktable_.swap(ranktable);
|
||||
}
|
||||
|
||||
uint32_t MPHIndex::Rank(uint32_t vertex) const {
|
||||
if (!ranktable_size_) return 0;
|
||||
if (ranktable_.empty()) return 0;
|
||||
uint32_t index = vertex >> b_;
|
||||
uint32_t base_rank = ranktable_[index];
|
||||
uint32_t beg_idx_v = index << b_;
|
||||
@ -211,4 +208,22 @@ uint32_t MPHIndex::Rank(uint32_t vertex) const {
|
||||
return base_rank;
|
||||
}
|
||||
|
||||
void MPHIndex::swap(std::vector<uint32_t>& params, dynamic_2bitset& g, std::vector<uint32_t>& ranktable) {
|
||||
params.resize(12);
|
||||
uint32_t rounded_c = c_ * 1000 * 1000;
|
||||
std::swap(params[0], rounded_c);
|
||||
c_ = static_cast<double>(rounded_c) / 1000 / 1000;
|
||||
std::swap(params[1], m_);
|
||||
std::swap(params[2], n_);
|
||||
std::swap(params[3], k_);
|
||||
uint32_t uint32_square = static_cast<uint32_t>(square_);
|
||||
std::swap(params[4], uint32_square);
|
||||
square_ = uint32_square;
|
||||
std::swap(params[5], hash_seed_[0]);
|
||||
std::swap(params[6], hash_seed_[1]);
|
||||
std::swap(params[7], hash_seed_[2]);
|
||||
g.swap(g_);
|
||||
ranktable.swap(ranktable_);
|
||||
}
|
||||
|
||||
} // namespace cxxmph
|
||||
|
@ -45,8 +45,7 @@ namespace cxxmph {
|
||||
class MPHIndex {
|
||||
public:
|
||||
MPHIndex(bool square = false, double c = 1.23, uint8_t b = 7) :
|
||||
c_(c), b_(b), m_(0), n_(0), k_(0), square_(square), r_(1), g_(8, true),
|
||||
ranktable_(NULL), ranktable_size_(0) {
|
||||
c_(c), b_(b), m_(0), n_(0), k_(0), square_(square), r_(1), g_(8, true) {
|
||||
nest_displacement_[0] = 0;
|
||||
nest_displacement_[1] = r_;
|
||||
nest_displacement_[2] = (r_ << 1);
|
||||
@ -63,7 +62,7 @@ class MPHIndex {
|
||||
void clear();
|
||||
|
||||
// Advanced users functions. Please avoid unless you know what you are doing.
|
||||
uint32_t perfect_hash_size() const { return n_; }
|
||||
uint32_t perfect_hash_size() const { return n_; }
|
||||
template <class SeededHashFcn, class Key> // must agree with Reset
|
||||
uint32_t perfect_hash(const Key& x) const; // way faster than the minimal
|
||||
template <class SeededHashFcn, class Key> // must agree with Reset
|
||||
@ -72,6 +71,11 @@ class MPHIndex {
|
||||
template <class SeededHashFcn, class Key> // must agree with Reset
|
||||
uint32_t minimal_perfect_hash(const Key& x) const;
|
||||
|
||||
// Experimental api to use as a serialization building block.
|
||||
// Since this signature exposes some implementation details, expect it to
|
||||
// change.
|
||||
void swap(std::vector<uint32_t>& params, dynamic_2bitset& g, std::vector<uint32_t>& ranktable);
|
||||
|
||||
private:
|
||||
template <class SeededHashFcn, class ForwardIterator>
|
||||
bool Mapping(ForwardIterator begin, ForwardIterator end,
|
||||
@ -85,7 +89,7 @@ class MPHIndex {
|
||||
|
||||
// Algorithm parameters
|
||||
// Perfect hash function density. If this was a 2graph,
|
||||
// then probability of having an acyclic graph would be
|
||||
// then probability of having an acyclic graph would be
|
||||
// sqrt(1-(2/c)^2). See section 3 for details.
|
||||
// http://www.it-c.dk/people/pagh/papers/simpleperf.pdf
|
||||
double c_;
|
||||
@ -107,8 +111,7 @@ class MPHIndex {
|
||||
dynamic_2bitset g_;
|
||||
uint8_t threebit_mod3[10]; // speed up mod3 calculation for 3bit ints
|
||||
// The table used for the rank step of the minimal perfect hash function
|
||||
const uint32_t* ranktable_;
|
||||
uint32_t ranktable_size_;
|
||||
std::vector<uint32_t> ranktable_;
|
||||
// The selected hash seed triplet for finding the edges in the minimal
|
||||
// perfect hash function graph.
|
||||
uint32_t hash_seed_[3];
|
||||
@ -142,7 +145,7 @@ bool MPHIndex::Reset(
|
||||
std::vector<TriGraph::Edge> edges;
|
||||
std::vector<uint32_t> queue;
|
||||
while (1) {
|
||||
// cerr << "Iterations missing: " << iterations << endl;
|
||||
cerr << "Iterations missing: " << iterations << endl;
|
||||
for (int i = 0; i < 3; ++i) hash_seed_[i] = random();
|
||||
if (Mapping<SeededHashFcn>(begin, end, &edges, &queue)) break;
|
||||
else --iterations;
|
||||
@ -160,7 +163,7 @@ bool MPHIndex::Mapping(
|
||||
ForwardIterator begin, ForwardIterator end,
|
||||
std::vector<TriGraph::Edge>* edges, std::vector<uint32_t>* queue) {
|
||||
TriGraph graph(n_, m_);
|
||||
for (ForwardIterator it = begin; it != end; ++it) {
|
||||
for (ForwardIterator it = begin; it != end; ++it) {
|
||||
h128 h = SeededHashFcn().hash128(*it, hash_seed_[0]);
|
||||
// for (int i = 0; i < 3; ++i) h[i] = SeededHashFcn()(*it, hash_seed_[i]);
|
||||
uint32_t v0 = h[0] % r_;
|
||||
|
@ -34,6 +34,16 @@ int main(int argc, char** argv) {
|
||||
sort(ids.begin(), ids.end());
|
||||
for (vector<int>::size_type i = 0; i < ids.size(); ++i) assert(ids[i] == static_cast<vector<int>::value_type>(i));
|
||||
|
||||
// Test serialization
|
||||
vector<uint32_t> params;
|
||||
dynamic_2bitset g;
|
||||
vector<uint32_t> ranktable;
|
||||
mph_index.swap(params, g, ranktable);
|
||||
assert(mph_index.size() == 0);
|
||||
mph_index.swap(params, g, ranktable);
|
||||
assert(mph_index.size() == ids.size());
|
||||
for (vector<int>::size_type i = 0; i < ids.size(); ++i) assert(ids[i] == static_cast<vector<int>::value_type>(i));
|
||||
|
||||
FlexibleMPHIndex<false, true, int64_t, seeded_hash<std::hash<int64_t>>::hash_function> square_empty;
|
||||
auto id = square_empty.index(1);
|
||||
FlexibleMPHIndex<false, false, int64_t, seeded_hash<std::hash<int64_t>>::hash_function> unordered_empty;
|
||||
|
Loading…
Reference in New Issue
Block a user