diff --git a/cxxmph/mph_bits.h b/cxxmph/mph_bits.h index 7dcf0be..06b2946 100644 --- a/cxxmph/mph_bits.h +++ b/cxxmph/mph_bits.h @@ -4,6 +4,7 @@ #include // for uint32_t and friends #include #include +#include #include #include #include @@ -13,47 +14,38 @@ namespace cxxmph { class dynamic_2bitset { public: - dynamic_2bitset() : data_(NULL), size_(0), one_initialized_(false) {} - dynamic_2bitset(uint32_t size, bool one_initialized = false) - : data_(NULL), size_(0), one_initialized_(one_initialized) { - resize(size); + dynamic_2bitset() : fill_(false) {} + dynamic_2bitset(uint32_t size, bool fill = false) + : size_(size), fill_(fill), data_(ceil(size / 4.0), ones()*fill) { } - ~dynamic_2bitset() { delete [] data_; } const uint8_t operator[](uint32_t i) const { return get(i); } uint8_t get(uint32_t i) const { return (data_[(i >> 2)] >> (((i & 3) << 1)) & 3); } uint8_t set(uint32_t i, uint8_t v) { - uint8_t sf = ((v << ((i & 3) << 1)) | dynamic_2bitset::vmask[i & 3]); - fprintf(stderr, "v %d sf %d\n", v, sf); + data_[(i >> 2)] |= ones() ^ dynamic_2bitset::vmask[i & 3]; data_[(i >> 2)] &= ((v << ((i & 3) << 1)) | dynamic_2bitset::vmask[i & 3]); + assert(v <= 3); assert(get(i) == v); } void resize(uint32_t size) { - uint8_t* new_data = new uint8_t[size << 2]; - assert(one_initialized_); - assert(one_initialized_ * ones() == ones()); - memset(new_data, one_initialized_*ones(), size << 2); - assert(new_data[0] == ones()); - uint8_t* old_data_ = data_; - for (int i = 0; i < size_; ++i) { - data_ = old_data_; - auto v = get(i); - data_ = new_data; - set(i, v); - } size_ = size; - delete [] old_data_; - data_ = new_data; - assert(data_[0] == ones()); - assert(get(0) == 3); + data_.resize(size >> 2, fill_*ones()); } + void swap(dynamic_2bitset& other) { + std::swap(other.size_, size_); + std::swap(other.fill_, fill_); + std::swap(other.data_, data_); + } + void clear() { data_.clear(); } + + uint32_t size() const { return size_; } static const uint8_t vmask[]; private: - uint8_t* data_; uint32_t size_; - bool one_initialized_; + bool fill_; + std::vector data_; uint8_t ones() { return std::numeric_limits::max(); } }; diff --git a/cxxmph/mph_bits_test.cc b/cxxmph/mph_bits_test.cc index c828f56..e6a764d 100644 --- a/cxxmph/mph_bits_test.cc +++ b/cxxmph/mph_bits_test.cc @@ -5,6 +5,15 @@ using cxxmph::dynamic_2bitset; int main(int argc, char** argv) { + dynamic_2bitset small(256, true); + for (int i = 0; i < small.size(); ++i) small.set(i, i % 4); + for (int i = 0; i < small.size(); ++i) { + if (small[i] != i % 4) { + fprintf(stderr, "wrong bits %d at %d expected %d\n", small[i], i, i % 4); + exit(-1); + } + } + int size = 256; dynamic_2bitset bits(size, true /* fill with ones */); for (int i = 0; i < size; ++i) { @@ -27,6 +36,14 @@ int main(int argc, char** argv) { exit(-1); } } + dynamic_2bitset size_corner1(1); + if (size_corner1.size() != 1) exit(-1); + dynamic_2bitset size_corner2(2); + if (size_corner2.size() != 2) exit(-1); + (dynamic_2bitset(4)).swap(size_corner2); + if (size_corner2.size() != 4) exit(-1); + + } diff --git a/cxxmph/mph_map.h b/cxxmph/mph_map.h index caddf12..a291986 100644 --- a/cxxmph/mph_map.h +++ b/cxxmph/mph_map.h @@ -17,6 +17,7 @@ #include #include // for std::pair +#include "mph_bits.h" #include "mph_index.h" #include "hollow_iterator.h" @@ -107,29 +108,34 @@ class mph_map { static const uint8_t kNestCollision = 3; // biggest 2 bit value void set_nest_value(const uint32_t* h, uint8_t value) { auto index = get_nest_index(h); - assert(get_nest_index(h) < nests_.size() * 4); + assert(get_nest_index(h) < nests_.size()); assert(get_nest_index(h) >> 2 < nests_.size()); assert(value < 4); - set_2bit_value(&nests_[0], index, value); - assert(get_2bit_value(&nests_[0], index) == value); + nests_.set(index, value); + assert(nests_[index] == value); } uint32_t get_nest_value(const uint32_t* h) const { - assert(get_nest_index(h) < nests_.size() * 4); - return get_2bit_value(&(nests_[0]), get_nest_index(h)); + assert(get_nest_index(h) < nests_.size()); + return nests_[get_nest_index(h)]; } uint32_t get_nest_index(const uint32_t* h) const { - return h[3] & ((nests_.size() << 2) - 1); + assert(nests_.size()); + return h[3] % nests_.size(); // a mod 2^n == a & 2^n - 1 + // return h[3] & (nests_.size() - 1); // a mod 2^n == a & 2^n - 1 } void pack(); std::vector values_; std::vector present_; - std::vector nests_; + dynamic_2bitset nests_; SimpleMPHIndex::hash_function> index_; // TODO(davi) optimize slack to hold 128 unique bits from hash64 as key typedef unordered_map slack_type; slack_type slack_; size_type size_; + + mutable uint64_t fast_; + mutable uint64_t slow_; }; MPH_MAP_TMPL_SPEC @@ -143,6 +149,7 @@ MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::mph_map() : size_(0) { } MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::~mph_map() { + fprintf(stderr, "Fast: %d Slow %d ratio %f\n", fast_, slow_, fast_*1.0/slow_); } MPH_MAP_METHOD_DECL(insert_return_type, insert)(const value_type& x) { @@ -176,11 +183,9 @@ MPH_MAP_METHOD_DECL(void_type, pack)() { new_values.reserve(new_values.size() * 2); std::vector new_present(index_.perfect_hash_size(), false); new_present.reserve(new_present.size() * 2); - auto new_nests_size = nextpoweroftwo(ceil(new_values.size() / 4.0) + 1)*10; - std::vector new_nests(new_nests_size, std::numeric_limits::max()); - new_nests.reserve(new_nests.size() * 2); - nests_.swap(new_nests); - vector used_nests(nests_.size() * 4); + auto new_nests_size = nextpoweroftwo(ceil(new_values.size())*10 + 1); + dynamic_2bitset(new_nests_size, true /* fill with 1s */).swap(nests_); + vector used_nests(nests_.size()); uint32_t collisions = 0; for (iterator it = begin(), it_end = end(); it != it_end; ++it) { size_type id = index_.perfect_hash(it->first); @@ -194,6 +199,7 @@ MPH_MAP_METHOD_DECL(void_type, pack)() { if (used_nests[get_nest_index(h)]) { set_nest_value(h, kNestCollision); assert(get_nest_value(h) == kNestCollision); + // fprintf(stderr, "Collision at nest index %d among %d positions\n", get_nest_index(h), nests_.size()); ++collisions; } else { set_nest_value(h, index_.cuckoo_nest(h)); @@ -207,7 +213,7 @@ MPH_MAP_METHOD_DECL(void_type, pack)() { index_.hash_vector(it->first, h); assert(get_nest_value(h) == kNestCollision || index_.perfect_hash(it->first) == index_.cuckoo_hash(h, get_nest_value(h))); } - fprintf(stderr, "Collision ratio: %f\n", collisions*1.0/size()); + // fprintf(stderr, "Collision ratio: %f\n", collisions*1.0/size()); values_.swap(new_values); present_.swap(new_present); slack_type().swap(slack_); @@ -225,8 +231,7 @@ MPH_MAP_METHOD_DECL(void_type, clear)() { present_.clear(); slack_.clear(); index_.clear(); - nests_.clear(); - nests_.push_back(std::numeric_limits::max()); + dynamic_2bitset(1, true /* fill with 1s */).swap(nests_); size_ = 0; } @@ -245,19 +250,19 @@ MPH_MAP_METHOD_DECL(void_type, erase)(const key_type& k) { } MPH_MAP_METHOD_DECL(const_iterator, find)(const key_type& k) const { - return slow_find(k, index_.perfect_hash(k)); - /* uint32_t h[4]; index_.hash_vector(k, h); auto nest = get_nest_value(h); if (__builtin_expect(nest != kNestCollision, 1)) { auto vit = values_.begin() + index_.cuckoo_hash(h, nest); - if (equal_(k, vit->first)) return make_iterator(vit); + if (equal_(k, vit->first)) { + ++fast_; + return make_iterator(vit); + } } nest = index_.cuckoo_nest(h); - assert(index_.perfect_hash(k) == index_.cuckoo_hash(h, nest)); + ++slow_; return slow_find(k, index_.cuckoo_hash(h, nest)); - */ } MPH_MAP_METHOD_DECL(const_iterator, slow_find)(const key_type& k, uint32_t perfect_hash) const { @@ -275,21 +280,18 @@ MPH_MAP_METHOD_DECL(const_iterator, slow_find)(const key_type& k, uint32_t perfe } MPH_MAP_METHOD_DECL(iterator, find)(const key_type& k) { - // return slow_find(k, index_.perfect_hash(k)); uint32_t h[4]; index_.hash_vector(k, h); auto nest = get_nest_value(h); if (__builtin_expect(nest != kNestCollision, 1)) { auto vit = values_.begin() + index_.cuckoo_hash(h, nest); - assert(index_.perfect_hash(k) == index_.cuckoo_hash(h, nest)); if (equal_(k, vit->first)) { - fprintf(stderr, "fast\n"); - return make_iterator(vit); + ++fast_; + return make_iterator(vit); } } nest = index_.cuckoo_nest(h); - fprintf(stderr, "slow\n"); - // assert(index_.perfect_hash(k) == index_.cuckoo_hash(h, nest)); + ++slow_; return slow_find(k, index_.cuckoo_hash(h, nest)); }