Working, but it sucks.

This commit is contained in:
Davi Reis 2012-03-14 18:26:26 -03:00
parent b96b71961d
commit e3ccde3ba0
3 changed files with 62 additions and 51 deletions

View File

@ -4,6 +4,7 @@
#include <stdint.h> // for uint32_t and friends #include <stdint.h> // for uint32_t and friends
#include <cassert> #include <cassert>
#include <climits> #include <climits>
#include <cmath>
#include <cstdio> #include <cstdio>
#include <cstring> #include <cstring>
#include <limits> #include <limits>
@ -13,47 +14,38 @@ namespace cxxmph {
class dynamic_2bitset { class dynamic_2bitset {
public: public:
dynamic_2bitset() : data_(NULL), size_(0), one_initialized_(false) {} dynamic_2bitset() : fill_(false) {}
dynamic_2bitset(uint32_t size, bool one_initialized = false) dynamic_2bitset(uint32_t size, bool fill = false)
: data_(NULL), size_(0), one_initialized_(one_initialized) { : size_(size), fill_(fill), data_(ceil(size / 4.0), ones()*fill) {
resize(size);
} }
~dynamic_2bitset() { delete [] data_; }
const uint8_t operator[](uint32_t i) const { return get(i); } const uint8_t operator[](uint32_t i) const { return get(i); }
uint8_t get(uint32_t i) const { uint8_t get(uint32_t i) const {
return (data_[(i >> 2)] >> (((i & 3) << 1)) & 3); return (data_[(i >> 2)] >> (((i & 3) << 1)) & 3);
} }
uint8_t set(uint32_t i, uint8_t v) { uint8_t set(uint32_t i, uint8_t v) {
uint8_t sf = ((v << ((i & 3) << 1)) | dynamic_2bitset::vmask[i & 3]); data_[(i >> 2)] |= ones() ^ dynamic_2bitset::vmask[i & 3];
fprintf(stderr, "v %d sf %d\n", v, sf);
data_[(i >> 2)] &= ((v << ((i & 3) << 1)) | dynamic_2bitset::vmask[i & 3]); data_[(i >> 2)] &= ((v << ((i & 3) << 1)) | dynamic_2bitset::vmask[i & 3]);
assert(v <= 3);
assert(get(i) == v); assert(get(i) == v);
} }
void resize(uint32_t size) { void resize(uint32_t size) {
uint8_t* new_data = new uint8_t[size << 2];
assert(one_initialized_);
assert(one_initialized_ * ones() == ones());
memset(new_data, one_initialized_*ones(), size << 2);
assert(new_data[0] == ones());
uint8_t* old_data_ = data_;
for (int i = 0; i < size_; ++i) {
data_ = old_data_;
auto v = get(i);
data_ = new_data;
set(i, v);
}
size_ = size; size_ = size;
delete [] old_data_; data_.resize(size >> 2, fill_*ones());
data_ = new_data;
assert(data_[0] == ones());
assert(get(0) == 3);
} }
void swap(dynamic_2bitset& other) {
std::swap(other.size_, size_);
std::swap(other.fill_, fill_);
std::swap(other.data_, data_);
}
void clear() { data_.clear(); }
uint32_t size() const { return size_; }
static const uint8_t vmask[]; static const uint8_t vmask[];
private: private:
uint8_t* data_;
uint32_t size_; uint32_t size_;
bool one_initialized_; bool fill_;
std::vector<uint8_t> data_;
uint8_t ones() { return std::numeric_limits<uint8_t>::max(); } uint8_t ones() { return std::numeric_limits<uint8_t>::max(); }
}; };

View File

@ -5,6 +5,15 @@
using cxxmph::dynamic_2bitset; using cxxmph::dynamic_2bitset;
int main(int argc, char** argv) { int main(int argc, char** argv) {
dynamic_2bitset small(256, true);
for (int i = 0; i < small.size(); ++i) small.set(i, i % 4);
for (int i = 0; i < small.size(); ++i) {
if (small[i] != i % 4) {
fprintf(stderr, "wrong bits %d at %d expected %d\n", small[i], i, i % 4);
exit(-1);
}
}
int size = 256; int size = 256;
dynamic_2bitset bits(size, true /* fill with ones */); dynamic_2bitset bits(size, true /* fill with ones */);
for (int i = 0; i < size; ++i) { for (int i = 0; i < size; ++i) {
@ -27,6 +36,14 @@ int main(int argc, char** argv) {
exit(-1); exit(-1);
} }
} }
dynamic_2bitset size_corner1(1);
if (size_corner1.size() != 1) exit(-1);
dynamic_2bitset size_corner2(2);
if (size_corner2.size() != 2) exit(-1);
(dynamic_2bitset(4)).swap(size_corner2);
if (size_corner2.size() != 4) exit(-1);
} }

View File

@ -17,6 +17,7 @@
#include <vector> #include <vector>
#include <utility> // for std::pair #include <utility> // for std::pair
#include "mph_bits.h"
#include "mph_index.h" #include "mph_index.h"
#include "hollow_iterator.h" #include "hollow_iterator.h"
@ -107,29 +108,34 @@ class mph_map {
static const uint8_t kNestCollision = 3; // biggest 2 bit value static const uint8_t kNestCollision = 3; // biggest 2 bit value
void set_nest_value(const uint32_t* h, uint8_t value) { void set_nest_value(const uint32_t* h, uint8_t value) {
auto index = get_nest_index(h); auto index = get_nest_index(h);
assert(get_nest_index(h) < nests_.size() * 4); assert(get_nest_index(h) < nests_.size());
assert(get_nest_index(h) >> 2 < nests_.size()); assert(get_nest_index(h) >> 2 < nests_.size());
assert(value < 4); assert(value < 4);
set_2bit_value(&nests_[0], index, value); nests_.set(index, value);
assert(get_2bit_value(&nests_[0], index) == value); assert(nests_[index] == value);
} }
uint32_t get_nest_value(const uint32_t* h) const { uint32_t get_nest_value(const uint32_t* h) const {
assert(get_nest_index(h) < nests_.size() * 4); assert(get_nest_index(h) < nests_.size());
return get_2bit_value(&(nests_[0]), get_nest_index(h)); return nests_[get_nest_index(h)];
} }
uint32_t get_nest_index(const uint32_t* h) const { uint32_t get_nest_index(const uint32_t* h) const {
return h[3] & ((nests_.size() << 2) - 1); assert(nests_.size());
return h[3] % nests_.size(); // a mod 2^n == a & 2^n - 1
// return h[3] & (nests_.size() - 1); // a mod 2^n == a & 2^n - 1
} }
void pack(); void pack();
std::vector<value_type> values_; std::vector<value_type> values_;
std::vector<bool> present_; std::vector<bool> present_;
std::vector<uint8_t> nests_; dynamic_2bitset nests_;
SimpleMPHIndex<Key, typename seeded_hash<HashFcn>::hash_function> index_; SimpleMPHIndex<Key, typename seeded_hash<HashFcn>::hash_function> index_;
// TODO(davi) optimize slack to hold 128 unique bits from hash64 as key // TODO(davi) optimize slack to hold 128 unique bits from hash64 as key
typedef unordered_map<Key, uint32_t, HashFcn, EqualKey, Alloc> slack_type; typedef unordered_map<Key, uint32_t, HashFcn, EqualKey, Alloc> slack_type;
slack_type slack_; slack_type slack_;
size_type size_; size_type size_;
mutable uint64_t fast_;
mutable uint64_t slow_;
}; };
MPH_MAP_TMPL_SPEC MPH_MAP_TMPL_SPEC
@ -143,6 +149,7 @@ MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::mph_map() : size_(0) {
} }
MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::~mph_map() { MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::~mph_map() {
fprintf(stderr, "Fast: %d Slow %d ratio %f\n", fast_, slow_, fast_*1.0/slow_);
} }
MPH_MAP_METHOD_DECL(insert_return_type, insert)(const value_type& x) { MPH_MAP_METHOD_DECL(insert_return_type, insert)(const value_type& x) {
@ -176,11 +183,9 @@ MPH_MAP_METHOD_DECL(void_type, pack)() {
new_values.reserve(new_values.size() * 2); new_values.reserve(new_values.size() * 2);
std::vector<bool> new_present(index_.perfect_hash_size(), false); std::vector<bool> new_present(index_.perfect_hash_size(), false);
new_present.reserve(new_present.size() * 2); new_present.reserve(new_present.size() * 2);
auto new_nests_size = nextpoweroftwo(ceil(new_values.size() / 4.0) + 1)*10; auto new_nests_size = nextpoweroftwo(ceil(new_values.size())*10 + 1);
std::vector<uint8_t> new_nests(new_nests_size, std::numeric_limits<uint8_t>::max()); dynamic_2bitset(new_nests_size, true /* fill with 1s */).swap(nests_);
new_nests.reserve(new_nests.size() * 2); vector<bool> used_nests(nests_.size());
nests_.swap(new_nests);
vector<bool> used_nests(nests_.size() * 4);
uint32_t collisions = 0; uint32_t collisions = 0;
for (iterator it = begin(), it_end = end(); it != it_end; ++it) { for (iterator it = begin(), it_end = end(); it != it_end; ++it) {
size_type id = index_.perfect_hash(it->first); size_type id = index_.perfect_hash(it->first);
@ -194,6 +199,7 @@ MPH_MAP_METHOD_DECL(void_type, pack)() {
if (used_nests[get_nest_index(h)]) { if (used_nests[get_nest_index(h)]) {
set_nest_value(h, kNestCollision); set_nest_value(h, kNestCollision);
assert(get_nest_value(h) == kNestCollision); assert(get_nest_value(h) == kNestCollision);
// fprintf(stderr, "Collision at nest index %d among %d positions\n", get_nest_index(h), nests_.size());
++collisions; ++collisions;
} else { } else {
set_nest_value(h, index_.cuckoo_nest(h)); set_nest_value(h, index_.cuckoo_nest(h));
@ -207,7 +213,7 @@ MPH_MAP_METHOD_DECL(void_type, pack)() {
index_.hash_vector(it->first, h); index_.hash_vector(it->first, h);
assert(get_nest_value(h) == kNestCollision || index_.perfect_hash(it->first) == index_.cuckoo_hash(h, get_nest_value(h))); assert(get_nest_value(h) == kNestCollision || index_.perfect_hash(it->first) == index_.cuckoo_hash(h, get_nest_value(h)));
} }
fprintf(stderr, "Collision ratio: %f\n", collisions*1.0/size()); // fprintf(stderr, "Collision ratio: %f\n", collisions*1.0/size());
values_.swap(new_values); values_.swap(new_values);
present_.swap(new_present); present_.swap(new_present);
slack_type().swap(slack_); slack_type().swap(slack_);
@ -225,8 +231,7 @@ MPH_MAP_METHOD_DECL(void_type, clear)() {
present_.clear(); present_.clear();
slack_.clear(); slack_.clear();
index_.clear(); index_.clear();
nests_.clear(); dynamic_2bitset(1, true /* fill with 1s */).swap(nests_);
nests_.push_back(std::numeric_limits<uint8_t>::max());
size_ = 0; size_ = 0;
} }
@ -245,19 +250,19 @@ MPH_MAP_METHOD_DECL(void_type, erase)(const key_type& k) {
} }
MPH_MAP_METHOD_DECL(const_iterator, find)(const key_type& k) const { MPH_MAP_METHOD_DECL(const_iterator, find)(const key_type& k) const {
return slow_find(k, index_.perfect_hash(k));
/*
uint32_t h[4]; uint32_t h[4];
index_.hash_vector(k, h); index_.hash_vector(k, h);
auto nest = get_nest_value(h); auto nest = get_nest_value(h);
if (__builtin_expect(nest != kNestCollision, 1)) { if (__builtin_expect(nest != kNestCollision, 1)) {
auto vit = values_.begin() + index_.cuckoo_hash(h, nest); auto vit = values_.begin() + index_.cuckoo_hash(h, nest);
if (equal_(k, vit->first)) return make_iterator(vit); if (equal_(k, vit->first)) {
++fast_;
return make_iterator(vit);
}
} }
nest = index_.cuckoo_nest(h); nest = index_.cuckoo_nest(h);
assert(index_.perfect_hash(k) == index_.cuckoo_hash(h, nest)); ++slow_;
return slow_find(k, index_.cuckoo_hash(h, nest)); return slow_find(k, index_.cuckoo_hash(h, nest));
*/
} }
MPH_MAP_METHOD_DECL(const_iterator, slow_find)(const key_type& k, uint32_t perfect_hash) const { MPH_MAP_METHOD_DECL(const_iterator, slow_find)(const key_type& k, uint32_t perfect_hash) const {
@ -275,21 +280,18 @@ MPH_MAP_METHOD_DECL(const_iterator, slow_find)(const key_type& k, uint32_t perfe
} }
MPH_MAP_METHOD_DECL(iterator, find)(const key_type& k) { MPH_MAP_METHOD_DECL(iterator, find)(const key_type& k) {
// return slow_find(k, index_.perfect_hash(k));
uint32_t h[4]; uint32_t h[4];
index_.hash_vector(k, h); index_.hash_vector(k, h);
auto nest = get_nest_value(h); auto nest = get_nest_value(h);
if (__builtin_expect(nest != kNestCollision, 1)) { if (__builtin_expect(nest != kNestCollision, 1)) {
auto vit = values_.begin() + index_.cuckoo_hash(h, nest); auto vit = values_.begin() + index_.cuckoo_hash(h, nest);
assert(index_.perfect_hash(k) == index_.cuckoo_hash(h, nest));
if (equal_(k, vit->first)) { if (equal_(k, vit->first)) {
fprintf(stderr, "fast\n"); ++fast_;
return make_iterator(vit); return make_iterator(vit);
} }
} }
nest = index_.cuckoo_nest(h); nest = index_.cuckoo_nest(h);
fprintf(stderr, "slow\n"); ++slow_;
// assert(index_.perfect_hash(k) == index_.cuckoo_hash(h, nest));
return slow_find(k, index_.cuckoo_hash(h, nest)); return slow_find(k, index_.cuckoo_hash(h, nest));
} }