Added nice optimization to avoid mod 3.
This commit is contained in:
parent
2bfe38d2da
commit
11d54ea837
|
@ -2,6 +2,8 @@
|
||||||
#define __CXXMPH_MPH_BITS_H__
|
#define __CXXMPH_MPH_BITS_H__
|
||||||
|
|
||||||
#include <stdint.h> // for uint32_t and friends
|
#include <stdint.h> // for uint32_t and friends
|
||||||
|
|
||||||
|
#include <array>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <climits>
|
#include <climits>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
@ -9,6 +11,7 @@
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
namespace cxxmph {
|
namespace cxxmph {
|
||||||
|
|
||||||
|
@ -61,6 +64,9 @@ static uint32_t nextpoweroftwo(uint32_t k) {
|
||||||
for (int i=1; i<sizeof(uint32_t)*CHAR_BIT; i<<=1) k = k | k >> i;
|
for (int i=1; i<sizeof(uint32_t)*CHAR_BIT; i<<=1) k = k | k >> i;
|
||||||
return k+1;
|
return k+1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Interesting bit tricks that might end up here:
|
||||||
|
// http://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
|
||||||
|
|
||||||
} // namespace cxxmph
|
} // namespace cxxmph
|
||||||
|
|
||||||
|
|
|
@ -45,7 +45,8 @@ class MPHIndex {
|
||||||
public:
|
public:
|
||||||
MPHIndex(double c = 1.23, uint8_t b = 7) :
|
MPHIndex(double c = 1.23, uint8_t b = 7) :
|
||||||
c_(c), b_(b), m_(0), n_(0), k_(0), r_(1),
|
c_(c), b_(b), m_(0), n_(0), k_(0), r_(1),
|
||||||
g_(NULL), g_size_(0), ranktable_(NULL), ranktable_size_(0),
|
g_(NULL), g_size_(0),
|
||||||
|
ranktable_(NULL), ranktable_size_(0),
|
||||||
deserialized_(false) { }
|
deserialized_(false) { }
|
||||||
~MPHIndex();
|
~MPHIndex();
|
||||||
|
|
||||||
|
@ -112,6 +113,7 @@ class MPHIndex {
|
||||||
// c++ vector to make mmap based backing easier.
|
// c++ vector to make mmap based backing easier.
|
||||||
const uint8_t* g_;
|
const uint8_t* g_;
|
||||||
uint32_t g_size_;
|
uint32_t g_size_;
|
||||||
|
uint8_t threebit_mod3[10]; // speed up mod3 calculation for 3bit ints
|
||||||
// The table used for the rank step of the minimal perfect hash function
|
// The table used for the rank step of the minimal perfect hash function
|
||||||
const uint32_t* ranktable_;
|
const uint32_t* ranktable_;
|
||||||
uint32_t ranktable_size_;
|
uint32_t ranktable_size_;
|
||||||
|
@ -135,12 +137,13 @@ bool MPHIndex::Reset(
|
||||||
m_ = size;
|
m_ = size;
|
||||||
r_ = static_cast<uint32_t>(ceil((c_*m_)/3));
|
r_ = static_cast<uint32_t>(ceil((c_*m_)/3));
|
||||||
if ((r_ % 2) == 0) r_ += 1;
|
if ((r_ % 2) == 0) r_ += 1;
|
||||||
nest_displacement_[0] = 0;
|
|
||||||
nest_displacement_[1] = r_;
|
|
||||||
nest_displacement_[2] = (r_ << 1);
|
|
||||||
// This can be used to speed mods, but increases occupation too much.
|
// This can be used to speed mods, but increases occupation too much.
|
||||||
// Needs to try http://gmplib.org/manual/Integer-Exponentiation.html instead
|
// Needs to try http://gmplib.org/manual/Integer-Exponentiation.html instead
|
||||||
// r_ = nextpoweroftwo(r_);
|
// r_ = nextpoweroftwo(r_);
|
||||||
|
nest_displacement_[0] = 0;
|
||||||
|
nest_displacement_[1] = r_;
|
||||||
|
nest_displacement_[2] = (r_ << 1);
|
||||||
|
for (int i = 0; i < sizeof(threebit_mod3); ++i) threebit_mod3[i] = i % 3;
|
||||||
|
|
||||||
n_ = 3*r_;
|
n_ = 3*r_;
|
||||||
k_ = 1U << b_;
|
k_ = 1U << b_;
|
||||||
|
@ -215,15 +218,18 @@ uint32_t MPHIndex::perfect_hash(const Key& key) const {
|
||||||
uint32_t h[4];
|
uint32_t h[4];
|
||||||
if (!g_size_) return 0;
|
if (!g_size_) return 0;
|
||||||
SeededHashFcn().hash64(key, hash_seed_[0], h);
|
SeededHashFcn().hash64(key, hash_seed_[0], h);
|
||||||
// for (int i = 0; i < 3; ++i) h[i] = SeededHashFcn()(key, hash_seed_[i]);
|
|
||||||
h[0] = (h[0] % r_) + nest_displacement_[0];
|
h[0] = (h[0] % r_) + nest_displacement_[0];
|
||||||
h[1] = (h[1] % r_) + nest_displacement_[1];
|
h[1] = (h[1] % r_) + nest_displacement_[1];
|
||||||
h[2] = (h[2] % r_) + nest_displacement_[2];
|
h[2] = (h[2] % r_) + nest_displacement_[2];
|
||||||
|
// h[0] = (h[0] & (r_-1)) + nest_displacement_[0];
|
||||||
|
// h[1] = (h[1] & (r_-1)) + nest_displacement_[1];
|
||||||
|
// h[2] = (h[2] & (r_-1)) + nest_displacement_[2];
|
||||||
// cerr << "g_.size() " << g_size_ << " h0 >> 2 " << (h[0] >> 2) << endl;
|
// cerr << "g_.size() " << g_size_ << " h0 >> 2 " << (h[0] >> 2) << endl;
|
||||||
assert((h[0] >> 2) <g_size_);
|
assert((h[0] >> 2) <g_size_);
|
||||||
assert((h[1] >> 2) <g_size_);
|
assert((h[1] >> 2) <g_size_);
|
||||||
assert((h[2] >> 2) <g_size_);
|
assert((h[2] >> 2) <g_size_);
|
||||||
uint8_t nest = (get_2bit_value(g_, h[0]) + get_2bit_value(g_, h[1]) + get_2bit_value(g_, h[2])) % 3;
|
uint8_t nest = threebit_mod3[
|
||||||
|
get_2bit_value(g_, h[0]) + get_2bit_value(g_, h[1]) + get_2bit_value(g_, h[2])];
|
||||||
uint32_t vertex = h[nest];
|
uint32_t vertex = h[nest];
|
||||||
return vertex;
|
return vertex;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue