1
Fork 0

Interesting point, but get_cuckoo_nest is adding a lot and fast path is not that fast for int64.

This commit is contained in:
Davi Reis 2012-03-14 21:22:40 -03:00
parent e3ccde3ba0
commit 7fe9527459
3 changed files with 44 additions and 15 deletions

View File

@ -93,8 +93,8 @@ int main(int argc, char** argv) {
Benchmark::Register(new BM_CreateUrls<unordered_map<StringPiece, StringPiece>>("URLS100k"));
Benchmark::Register(new BM_SearchUrls<mph_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0));
Benchmark::Register(new BM_SearchUrls<unordered_map<StringPiece, StringPiece, Murmur3StringPiece>>("URLS100k", 10*1000 * 1000, 0));
Benchmark::Register(new BM_SearchUrls<mph_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0.9));
Benchmark::Register(new BM_SearchUrls<unordered_map<StringPiece, StringPiece, Murmur3StringPiece>>("URLS100k", 10*1000 * 1000, 0.9));
// Benchmark::Register(new BM_SearchUrls<mph_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0.9));
// Benchmark::Register(new BM_SearchUrls<unordered_map<StringPiece, StringPiece, Murmur3StringPiece>>("URLS100k", 10*1000 * 1000, 0.9));
Benchmark::Register(new BM_SearchUint64<mph_map<uint64_t, uint64_t>>);
Benchmark::Register(new BM_SearchUint64<unordered_map<uint64_t, uint64_t>>);
Benchmark::RunAll();

View File

@ -120,8 +120,10 @@ class mph_map {
}
uint32_t get_nest_index(const uint32_t* h) const {
assert(nests_.size());
return h[3] % nests_.size(); // a mod 2^n == a & 2^n - 1
// return h[3] & (nests_.size() - 1); // a mod 2^n == a & 2^n - 1
assert(nests_.size() % 2 == 0);
assert((nests_.size() & (nests_.size() - 1)) == 0);
assert((h[3] % nests_.size()) == (h[3] & (nests_.size() - 1)));
return (h[3] & (nests_.size() - 1)); // a mod 2^n == a & 2^n - 1
}
void pack();
@ -135,7 +137,9 @@ class mph_map {
size_type size_;
mutable uint64_t fast_;
mutable uint64_t fast_taken_;
mutable uint64_t slow_;
mutable uint64_t very_slow_;
};
MPH_MAP_TMPL_SPEC
@ -149,7 +153,7 @@ MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::mph_map() : size_(0) {
}
MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::~mph_map() {
fprintf(stderr, "Fast: %d Slow %d ratio %f\n", fast_, slow_, fast_*1.0/slow_);
fprintf(stderr, "Fast taken: %d Fast: %d Slow %d very_slow %d ratio %f\n", fast_taken_, fast_, slow_, very_slow_, fast_*1.0/slow_);
}
MPH_MAP_METHOD_DECL(insert_return_type, insert)(const value_type& x) {
@ -169,10 +173,15 @@ MPH_MAP_METHOD_DECL(insert_return_type, insert)(const value_type& x) {
slack_.insert(make_pair(x.first, values_.size() - 1));
if (should_pack) pack();
it = find(x.first);
slow_ = 0;
very_slow_ = 0;
fast_ = 0;
fast_taken_ = 0;
return make_pair(it, true);
}
MPH_MAP_METHOD_DECL(void_type, pack)() {
// fprintf(stderr, "Paki %d values\n", values_.size());
if (values_.empty()) return;
assert(std::unordered_set<key_type>(make_iterator_first(begin()), make_iterator_first(end())).size() == size());
bool success = index_.Reset(
@ -183,7 +192,7 @@ MPH_MAP_METHOD_DECL(void_type, pack)() {
new_values.reserve(new_values.size() * 2);
std::vector<bool> new_present(index_.perfect_hash_size(), false);
new_present.reserve(new_present.size() * 2);
auto new_nests_size = nextpoweroftwo(ceil(new_values.size())*10 + 1);
auto new_nests_size = nextpoweroftwo(ceil(new_values.size())*100 + 1);
dynamic_2bitset(new_nests_size, true /* fill with 1s */).swap(nests_);
vector<bool> used_nests(nests_.size());
uint32_t collisions = 0;
@ -208,15 +217,24 @@ MPH_MAP_METHOD_DECL(void_type, pack)() {
used_nests[get_nest_index(h)] = true;
}
}
for (iterator it = begin(), it_end = end(); it != it_end; ++it) {
uint32_t h[4];
index_.hash_vector(it->first, h);
assert(get_nest_value(h) == kNestCollision || index_.perfect_hash(it->first) == index_.cuckoo_hash(h, get_nest_value(h)));
}
// fprintf(stderr, "Collision ratio: %f\n", collisions*1.0/size());
values_.swap(new_values);
present_.swap(new_present);
slack_type().swap(slack_);
int32_t fast = 0;
int32_t slow= 0;
for (iterator it = begin(), it_end = end(); it != it_end; ++it) {
uint32_t h[4];
index_.hash_vector(it->first, h);
if (get_nest_value(h) == kNestCollision) ++slow;
else {
++fast;
auto cit = values_.begin() + index_.cuckoo_hash(h, get_nest_value(h));
assert(index_.perfect_hash(it->first) == cit - values_.begin());
assert(equal_(it->first, cit->first));
}
}
// fprintf(stderr, "Predicted fast: %d slow %d\n", fast, slow);
}
MPH_MAP_METHOD_DECL(iterator, begin)() { return make_iterator(values_.begin()); }
@ -231,7 +249,7 @@ MPH_MAP_METHOD_DECL(void_type, clear)() {
present_.clear();
slack_.clear();
index_.clear();
dynamic_2bitset(1, true /* fill with 1s */).swap(nests_);
dynamic_2bitset(8, true /* fill with 1s */).swap(nests_);
size_ = 0;
}
@ -254,7 +272,10 @@ MPH_MAP_METHOD_DECL(const_iterator, find)(const key_type& k) const {
index_.hash_vector(k, h);
auto nest = get_nest_value(h);
if (__builtin_expect(nest != kNestCollision, 1)) {
++fast_taken_;
auto vit = values_.begin() + index_.cuckoo_hash(h, nest);
// do not hold for unknown keys
assert(values_.size() != index_.perfect_hash_size() || equal_(k, vit->first));
if (equal_(k, vit->first)) {
++fast_;
return make_iterator(vit);
@ -273,6 +294,7 @@ MPH_MAP_METHOD_DECL(const_iterator, slow_find)(const key_type& k, uint32_t perfe
}
}
if (__builtin_expect(!slack_.empty(), 0)) {
++very_slow_;
auto sit = slack_.find(k);
if (sit != slack_.end()) return make_iterator(values_.begin() + sit->second);
}
@ -284,7 +306,9 @@ MPH_MAP_METHOD_DECL(iterator, find)(const key_type& k) {
index_.hash_vector(k, h);
auto nest = get_nest_value(h);
if (__builtin_expect(nest != kNestCollision, 1)) {
++fast_taken_;
auto vit = values_.begin() + index_.cuckoo_hash(h, nest);
assert(values_.size() != index_.perfect_hash_size() || equal_(k, vit->first));
if (equal_(k, vit->first)) {
++fast_;
return make_iterator(vit);
@ -303,6 +327,7 @@ MPH_MAP_METHOD_DECL(iterator, slow_find)(const key_type& k, uint32_t perfect_has
}
}
if (__builtin_expect(!slack_.empty(), 0)) {
++very_slow_;
auto sit = slack_.find(k);
if (sit != slack_.end()) return make_iterator(values_.begin() + sit->second);
}

View File

@ -15,17 +15,20 @@ int main(int argc, char** argv) {
for (int i = 0; i < num_keys; ++i) {
b.insert(make_pair(i, i));
}
for (int i = 0; i < num_keys; ++i) {
auto it = b.find(i);
b.rehash(b.size());
fprintf(stderr, "Insertion finished\n");
for (int i = 0; i < 1000000; ++i) {
auto it = b.find(i % num_keys);
if (it == b.end()) {
std::cerr << "Failed to find " << i << std::endl;
exit(-1);
}
if (it->first != it->second || it->first != i) {
if (it->first != it->second || it->first != i % num_keys) {
std::cerr << "Found " << it->first << " looking for " << i << std::endl;
exit(-1);
}
}
/*
mph_map<string, int> h;
h.insert(std::make_pair("-1",-1));
mph_map<string, int>::const_iterator it;
@ -55,4 +58,5 @@ int main(int argc, char** argv) {
if (key < num_valid && it->second != key) exit(-1);
}
}
*/
}