Working, but it sucks.
This commit is contained in:
parent
b96b71961d
commit
e3ccde3ba0
@ -4,6 +4,7 @@
|
|||||||
#include <stdint.h> // for uint32_t and friends
|
#include <stdint.h> // for uint32_t and friends
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <climits>
|
#include <climits>
|
||||||
|
#include <cmath>
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
@ -13,47 +14,38 @@ namespace cxxmph {
|
|||||||
|
|
||||||
class dynamic_2bitset {
|
class dynamic_2bitset {
|
||||||
public:
|
public:
|
||||||
dynamic_2bitset() : data_(NULL), size_(0), one_initialized_(false) {}
|
dynamic_2bitset() : fill_(false) {}
|
||||||
dynamic_2bitset(uint32_t size, bool one_initialized = false)
|
dynamic_2bitset(uint32_t size, bool fill = false)
|
||||||
: data_(NULL), size_(0), one_initialized_(one_initialized) {
|
: size_(size), fill_(fill), data_(ceil(size / 4.0), ones()*fill) {
|
||||||
resize(size);
|
|
||||||
}
|
}
|
||||||
~dynamic_2bitset() { delete [] data_; }
|
|
||||||
|
|
||||||
const uint8_t operator[](uint32_t i) const { return get(i); }
|
const uint8_t operator[](uint32_t i) const { return get(i); }
|
||||||
uint8_t get(uint32_t i) const {
|
uint8_t get(uint32_t i) const {
|
||||||
return (data_[(i >> 2)] >> (((i & 3) << 1)) & 3);
|
return (data_[(i >> 2)] >> (((i & 3) << 1)) & 3);
|
||||||
}
|
}
|
||||||
uint8_t set(uint32_t i, uint8_t v) {
|
uint8_t set(uint32_t i, uint8_t v) {
|
||||||
uint8_t sf = ((v << ((i & 3) << 1)) | dynamic_2bitset::vmask[i & 3]);
|
data_[(i >> 2)] |= ones() ^ dynamic_2bitset::vmask[i & 3];
|
||||||
fprintf(stderr, "v %d sf %d\n", v, sf);
|
|
||||||
data_[(i >> 2)] &= ((v << ((i & 3) << 1)) | dynamic_2bitset::vmask[i & 3]);
|
data_[(i >> 2)] &= ((v << ((i & 3) << 1)) | dynamic_2bitset::vmask[i & 3]);
|
||||||
|
assert(v <= 3);
|
||||||
assert(get(i) == v);
|
assert(get(i) == v);
|
||||||
}
|
}
|
||||||
void resize(uint32_t size) {
|
void resize(uint32_t size) {
|
||||||
uint8_t* new_data = new uint8_t[size << 2];
|
|
||||||
assert(one_initialized_);
|
|
||||||
assert(one_initialized_ * ones() == ones());
|
|
||||||
memset(new_data, one_initialized_*ones(), size << 2);
|
|
||||||
assert(new_data[0] == ones());
|
|
||||||
uint8_t* old_data_ = data_;
|
|
||||||
for (int i = 0; i < size_; ++i) {
|
|
||||||
data_ = old_data_;
|
|
||||||
auto v = get(i);
|
|
||||||
data_ = new_data;
|
|
||||||
set(i, v);
|
|
||||||
}
|
|
||||||
size_ = size;
|
size_ = size;
|
||||||
delete [] old_data_;
|
data_.resize(size >> 2, fill_*ones());
|
||||||
data_ = new_data;
|
|
||||||
assert(data_[0] == ones());
|
|
||||||
assert(get(0) == 3);
|
|
||||||
}
|
}
|
||||||
|
void swap(dynamic_2bitset& other) {
|
||||||
|
std::swap(other.size_, size_);
|
||||||
|
std::swap(other.fill_, fill_);
|
||||||
|
std::swap(other.data_, data_);
|
||||||
|
}
|
||||||
|
void clear() { data_.clear(); }
|
||||||
|
|
||||||
|
uint32_t size() const { return size_; }
|
||||||
static const uint8_t vmask[];
|
static const uint8_t vmask[];
|
||||||
private:
|
private:
|
||||||
uint8_t* data_;
|
|
||||||
uint32_t size_;
|
uint32_t size_;
|
||||||
bool one_initialized_;
|
bool fill_;
|
||||||
|
std::vector<uint8_t> data_;
|
||||||
uint8_t ones() { return std::numeric_limits<uint8_t>::max(); }
|
uint8_t ones() { return std::numeric_limits<uint8_t>::max(); }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -5,6 +5,15 @@
|
|||||||
|
|
||||||
using cxxmph::dynamic_2bitset;
|
using cxxmph::dynamic_2bitset;
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
|
dynamic_2bitset small(256, true);
|
||||||
|
for (int i = 0; i < small.size(); ++i) small.set(i, i % 4);
|
||||||
|
for (int i = 0; i < small.size(); ++i) {
|
||||||
|
if (small[i] != i % 4) {
|
||||||
|
fprintf(stderr, "wrong bits %d at %d expected %d\n", small[i], i, i % 4);
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int size = 256;
|
int size = 256;
|
||||||
dynamic_2bitset bits(size, true /* fill with ones */);
|
dynamic_2bitset bits(size, true /* fill with ones */);
|
||||||
for (int i = 0; i < size; ++i) {
|
for (int i = 0; i < size; ++i) {
|
||||||
@ -27,6 +36,14 @@ int main(int argc, char** argv) {
|
|||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
dynamic_2bitset size_corner1(1);
|
||||||
|
if (size_corner1.size() != 1) exit(-1);
|
||||||
|
dynamic_2bitset size_corner2(2);
|
||||||
|
if (size_corner2.size() != 2) exit(-1);
|
||||||
|
(dynamic_2bitset(4)).swap(size_corner2);
|
||||||
|
if (size_corner2.size() != 4) exit(-1);
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include <utility> // for std::pair
|
#include <utility> // for std::pair
|
||||||
|
|
||||||
|
#include "mph_bits.h"
|
||||||
#include "mph_index.h"
|
#include "mph_index.h"
|
||||||
#include "hollow_iterator.h"
|
#include "hollow_iterator.h"
|
||||||
|
|
||||||
@ -107,29 +108,34 @@ class mph_map {
|
|||||||
static const uint8_t kNestCollision = 3; // biggest 2 bit value
|
static const uint8_t kNestCollision = 3; // biggest 2 bit value
|
||||||
void set_nest_value(const uint32_t* h, uint8_t value) {
|
void set_nest_value(const uint32_t* h, uint8_t value) {
|
||||||
auto index = get_nest_index(h);
|
auto index = get_nest_index(h);
|
||||||
assert(get_nest_index(h) < nests_.size() * 4);
|
assert(get_nest_index(h) < nests_.size());
|
||||||
assert(get_nest_index(h) >> 2 < nests_.size());
|
assert(get_nest_index(h) >> 2 < nests_.size());
|
||||||
assert(value < 4);
|
assert(value < 4);
|
||||||
set_2bit_value(&nests_[0], index, value);
|
nests_.set(index, value);
|
||||||
assert(get_2bit_value(&nests_[0], index) == value);
|
assert(nests_[index] == value);
|
||||||
}
|
}
|
||||||
uint32_t get_nest_value(const uint32_t* h) const {
|
uint32_t get_nest_value(const uint32_t* h) const {
|
||||||
assert(get_nest_index(h) < nests_.size() * 4);
|
assert(get_nest_index(h) < nests_.size());
|
||||||
return get_2bit_value(&(nests_[0]), get_nest_index(h));
|
return nests_[get_nest_index(h)];
|
||||||
}
|
}
|
||||||
uint32_t get_nest_index(const uint32_t* h) const {
|
uint32_t get_nest_index(const uint32_t* h) const {
|
||||||
return h[3] & ((nests_.size() << 2) - 1);
|
assert(nests_.size());
|
||||||
|
return h[3] % nests_.size(); // a mod 2^n == a & 2^n - 1
|
||||||
|
// return h[3] & (nests_.size() - 1); // a mod 2^n == a & 2^n - 1
|
||||||
}
|
}
|
||||||
|
|
||||||
void pack();
|
void pack();
|
||||||
std::vector<value_type> values_;
|
std::vector<value_type> values_;
|
||||||
std::vector<bool> present_;
|
std::vector<bool> present_;
|
||||||
std::vector<uint8_t> nests_;
|
dynamic_2bitset nests_;
|
||||||
SimpleMPHIndex<Key, typename seeded_hash<HashFcn>::hash_function> index_;
|
SimpleMPHIndex<Key, typename seeded_hash<HashFcn>::hash_function> index_;
|
||||||
// TODO(davi) optimize slack to hold 128 unique bits from hash64 as key
|
// TODO(davi) optimize slack to hold 128 unique bits from hash64 as key
|
||||||
typedef unordered_map<Key, uint32_t, HashFcn, EqualKey, Alloc> slack_type;
|
typedef unordered_map<Key, uint32_t, HashFcn, EqualKey, Alloc> slack_type;
|
||||||
slack_type slack_;
|
slack_type slack_;
|
||||||
size_type size_;
|
size_type size_;
|
||||||
|
|
||||||
|
mutable uint64_t fast_;
|
||||||
|
mutable uint64_t slow_;
|
||||||
};
|
};
|
||||||
|
|
||||||
MPH_MAP_TMPL_SPEC
|
MPH_MAP_TMPL_SPEC
|
||||||
@ -143,6 +149,7 @@ MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::mph_map() : size_(0) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::~mph_map() {
|
MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::~mph_map() {
|
||||||
|
fprintf(stderr, "Fast: %d Slow %d ratio %f\n", fast_, slow_, fast_*1.0/slow_);
|
||||||
}
|
}
|
||||||
|
|
||||||
MPH_MAP_METHOD_DECL(insert_return_type, insert)(const value_type& x) {
|
MPH_MAP_METHOD_DECL(insert_return_type, insert)(const value_type& x) {
|
||||||
@ -176,11 +183,9 @@ MPH_MAP_METHOD_DECL(void_type, pack)() {
|
|||||||
new_values.reserve(new_values.size() * 2);
|
new_values.reserve(new_values.size() * 2);
|
||||||
std::vector<bool> new_present(index_.perfect_hash_size(), false);
|
std::vector<bool> new_present(index_.perfect_hash_size(), false);
|
||||||
new_present.reserve(new_present.size() * 2);
|
new_present.reserve(new_present.size() * 2);
|
||||||
auto new_nests_size = nextpoweroftwo(ceil(new_values.size() / 4.0) + 1)*10;
|
auto new_nests_size = nextpoweroftwo(ceil(new_values.size())*10 + 1);
|
||||||
std::vector<uint8_t> new_nests(new_nests_size, std::numeric_limits<uint8_t>::max());
|
dynamic_2bitset(new_nests_size, true /* fill with 1s */).swap(nests_);
|
||||||
new_nests.reserve(new_nests.size() * 2);
|
vector<bool> used_nests(nests_.size());
|
||||||
nests_.swap(new_nests);
|
|
||||||
vector<bool> used_nests(nests_.size() * 4);
|
|
||||||
uint32_t collisions = 0;
|
uint32_t collisions = 0;
|
||||||
for (iterator it = begin(), it_end = end(); it != it_end; ++it) {
|
for (iterator it = begin(), it_end = end(); it != it_end; ++it) {
|
||||||
size_type id = index_.perfect_hash(it->first);
|
size_type id = index_.perfect_hash(it->first);
|
||||||
@ -194,6 +199,7 @@ MPH_MAP_METHOD_DECL(void_type, pack)() {
|
|||||||
if (used_nests[get_nest_index(h)]) {
|
if (used_nests[get_nest_index(h)]) {
|
||||||
set_nest_value(h, kNestCollision);
|
set_nest_value(h, kNestCollision);
|
||||||
assert(get_nest_value(h) == kNestCollision);
|
assert(get_nest_value(h) == kNestCollision);
|
||||||
|
// fprintf(stderr, "Collision at nest index %d among %d positions\n", get_nest_index(h), nests_.size());
|
||||||
++collisions;
|
++collisions;
|
||||||
} else {
|
} else {
|
||||||
set_nest_value(h, index_.cuckoo_nest(h));
|
set_nest_value(h, index_.cuckoo_nest(h));
|
||||||
@ -207,7 +213,7 @@ MPH_MAP_METHOD_DECL(void_type, pack)() {
|
|||||||
index_.hash_vector(it->first, h);
|
index_.hash_vector(it->first, h);
|
||||||
assert(get_nest_value(h) == kNestCollision || index_.perfect_hash(it->first) == index_.cuckoo_hash(h, get_nest_value(h)));
|
assert(get_nest_value(h) == kNestCollision || index_.perfect_hash(it->first) == index_.cuckoo_hash(h, get_nest_value(h)));
|
||||||
}
|
}
|
||||||
fprintf(stderr, "Collision ratio: %f\n", collisions*1.0/size());
|
// fprintf(stderr, "Collision ratio: %f\n", collisions*1.0/size());
|
||||||
values_.swap(new_values);
|
values_.swap(new_values);
|
||||||
present_.swap(new_present);
|
present_.swap(new_present);
|
||||||
slack_type().swap(slack_);
|
slack_type().swap(slack_);
|
||||||
@ -225,8 +231,7 @@ MPH_MAP_METHOD_DECL(void_type, clear)() {
|
|||||||
present_.clear();
|
present_.clear();
|
||||||
slack_.clear();
|
slack_.clear();
|
||||||
index_.clear();
|
index_.clear();
|
||||||
nests_.clear();
|
dynamic_2bitset(1, true /* fill with 1s */).swap(nests_);
|
||||||
nests_.push_back(std::numeric_limits<uint8_t>::max());
|
|
||||||
size_ = 0;
|
size_ = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -245,19 +250,19 @@ MPH_MAP_METHOD_DECL(void_type, erase)(const key_type& k) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
MPH_MAP_METHOD_DECL(const_iterator, find)(const key_type& k) const {
|
MPH_MAP_METHOD_DECL(const_iterator, find)(const key_type& k) const {
|
||||||
return slow_find(k, index_.perfect_hash(k));
|
|
||||||
/*
|
|
||||||
uint32_t h[4];
|
uint32_t h[4];
|
||||||
index_.hash_vector(k, h);
|
index_.hash_vector(k, h);
|
||||||
auto nest = get_nest_value(h);
|
auto nest = get_nest_value(h);
|
||||||
if (__builtin_expect(nest != kNestCollision, 1)) {
|
if (__builtin_expect(nest != kNestCollision, 1)) {
|
||||||
auto vit = values_.begin() + index_.cuckoo_hash(h, nest);
|
auto vit = values_.begin() + index_.cuckoo_hash(h, nest);
|
||||||
if (equal_(k, vit->first)) return make_iterator(vit);
|
if (equal_(k, vit->first)) {
|
||||||
|
++fast_;
|
||||||
|
return make_iterator(vit);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
nest = index_.cuckoo_nest(h);
|
nest = index_.cuckoo_nest(h);
|
||||||
assert(index_.perfect_hash(k) == index_.cuckoo_hash(h, nest));
|
++slow_;
|
||||||
return slow_find(k, index_.cuckoo_hash(h, nest));
|
return slow_find(k, index_.cuckoo_hash(h, nest));
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
MPH_MAP_METHOD_DECL(const_iterator, slow_find)(const key_type& k, uint32_t perfect_hash) const {
|
MPH_MAP_METHOD_DECL(const_iterator, slow_find)(const key_type& k, uint32_t perfect_hash) const {
|
||||||
@ -275,21 +280,18 @@ MPH_MAP_METHOD_DECL(const_iterator, slow_find)(const key_type& k, uint32_t perfe
|
|||||||
}
|
}
|
||||||
|
|
||||||
MPH_MAP_METHOD_DECL(iterator, find)(const key_type& k) {
|
MPH_MAP_METHOD_DECL(iterator, find)(const key_type& k) {
|
||||||
// return slow_find(k, index_.perfect_hash(k));
|
|
||||||
uint32_t h[4];
|
uint32_t h[4];
|
||||||
index_.hash_vector(k, h);
|
index_.hash_vector(k, h);
|
||||||
auto nest = get_nest_value(h);
|
auto nest = get_nest_value(h);
|
||||||
if (__builtin_expect(nest != kNestCollision, 1)) {
|
if (__builtin_expect(nest != kNestCollision, 1)) {
|
||||||
auto vit = values_.begin() + index_.cuckoo_hash(h, nest);
|
auto vit = values_.begin() + index_.cuckoo_hash(h, nest);
|
||||||
assert(index_.perfect_hash(k) == index_.cuckoo_hash(h, nest));
|
|
||||||
if (equal_(k, vit->first)) {
|
if (equal_(k, vit->first)) {
|
||||||
fprintf(stderr, "fast\n");
|
++fast_;
|
||||||
return make_iterator(vit);
|
return make_iterator(vit);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
nest = index_.cuckoo_nest(h);
|
nest = index_.cuckoo_nest(h);
|
||||||
fprintf(stderr, "slow\n");
|
++slow_;
|
||||||
// assert(index_.perfect_hash(k) == index_.cuckoo_hash(h, nest));
|
|
||||||
return slow_find(k, index_.cuckoo_hash(h, nest));
|
return slow_find(k, index_.cuckoo_hash(h, nest));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user