Interesting point, but get_cuckoo_nest is adding a lot and fast path is not that fast for int64.
This commit is contained in:
parent
e3ccde3ba0
commit
7fe9527459
|
@ -93,8 +93,8 @@ int main(int argc, char** argv) {
|
||||||
Benchmark::Register(new BM_CreateUrls<unordered_map<StringPiece, StringPiece>>("URLS100k"));
|
Benchmark::Register(new BM_CreateUrls<unordered_map<StringPiece, StringPiece>>("URLS100k"));
|
||||||
Benchmark::Register(new BM_SearchUrls<mph_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0));
|
Benchmark::Register(new BM_SearchUrls<mph_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0));
|
||||||
Benchmark::Register(new BM_SearchUrls<unordered_map<StringPiece, StringPiece, Murmur3StringPiece>>("URLS100k", 10*1000 * 1000, 0));
|
Benchmark::Register(new BM_SearchUrls<unordered_map<StringPiece, StringPiece, Murmur3StringPiece>>("URLS100k", 10*1000 * 1000, 0));
|
||||||
Benchmark::Register(new BM_SearchUrls<mph_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0.9));
|
// Benchmark::Register(new BM_SearchUrls<mph_map<StringPiece, StringPiece>>("URLS100k", 10*1000 * 1000, 0.9));
|
||||||
Benchmark::Register(new BM_SearchUrls<unordered_map<StringPiece, StringPiece, Murmur3StringPiece>>("URLS100k", 10*1000 * 1000, 0.9));
|
// Benchmark::Register(new BM_SearchUrls<unordered_map<StringPiece, StringPiece, Murmur3StringPiece>>("URLS100k", 10*1000 * 1000, 0.9));
|
||||||
Benchmark::Register(new BM_SearchUint64<mph_map<uint64_t, uint64_t>>);
|
Benchmark::Register(new BM_SearchUint64<mph_map<uint64_t, uint64_t>>);
|
||||||
Benchmark::Register(new BM_SearchUint64<unordered_map<uint64_t, uint64_t>>);
|
Benchmark::Register(new BM_SearchUint64<unordered_map<uint64_t, uint64_t>>);
|
||||||
Benchmark::RunAll();
|
Benchmark::RunAll();
|
||||||
|
|
|
@ -120,8 +120,10 @@ class mph_map {
|
||||||
}
|
}
|
||||||
uint32_t get_nest_index(const uint32_t* h) const {
|
uint32_t get_nest_index(const uint32_t* h) const {
|
||||||
assert(nests_.size());
|
assert(nests_.size());
|
||||||
return h[3] % nests_.size(); // a mod 2^n == a & 2^n - 1
|
assert(nests_.size() % 2 == 0);
|
||||||
// return h[3] & (nests_.size() - 1); // a mod 2^n == a & 2^n - 1
|
assert((nests_.size() & (nests_.size() - 1)) == 0);
|
||||||
|
assert((h[3] % nests_.size()) == (h[3] & (nests_.size() - 1)));
|
||||||
|
return (h[3] & (nests_.size() - 1)); // a mod 2^n == a & 2^n - 1
|
||||||
}
|
}
|
||||||
|
|
||||||
void pack();
|
void pack();
|
||||||
|
@ -135,7 +137,9 @@ class mph_map {
|
||||||
size_type size_;
|
size_type size_;
|
||||||
|
|
||||||
mutable uint64_t fast_;
|
mutable uint64_t fast_;
|
||||||
|
mutable uint64_t fast_taken_;
|
||||||
mutable uint64_t slow_;
|
mutable uint64_t slow_;
|
||||||
|
mutable uint64_t very_slow_;
|
||||||
};
|
};
|
||||||
|
|
||||||
MPH_MAP_TMPL_SPEC
|
MPH_MAP_TMPL_SPEC
|
||||||
|
@ -149,7 +153,7 @@ MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::mph_map() : size_(0) {
|
||||||
}
|
}
|
||||||
|
|
||||||
MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::~mph_map() {
|
MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::~mph_map() {
|
||||||
fprintf(stderr, "Fast: %d Slow %d ratio %f\n", fast_, slow_, fast_*1.0/slow_);
|
fprintf(stderr, "Fast taken: %d Fast: %d Slow %d very_slow %d ratio %f\n", fast_taken_, fast_, slow_, very_slow_, fast_*1.0/slow_);
|
||||||
}
|
}
|
||||||
|
|
||||||
MPH_MAP_METHOD_DECL(insert_return_type, insert)(const value_type& x) {
|
MPH_MAP_METHOD_DECL(insert_return_type, insert)(const value_type& x) {
|
||||||
|
@ -169,10 +173,15 @@ MPH_MAP_METHOD_DECL(insert_return_type, insert)(const value_type& x) {
|
||||||
slack_.insert(make_pair(x.first, values_.size() - 1));
|
slack_.insert(make_pair(x.first, values_.size() - 1));
|
||||||
if (should_pack) pack();
|
if (should_pack) pack();
|
||||||
it = find(x.first);
|
it = find(x.first);
|
||||||
|
slow_ = 0;
|
||||||
|
very_slow_ = 0;
|
||||||
|
fast_ = 0;
|
||||||
|
fast_taken_ = 0;
|
||||||
return make_pair(it, true);
|
return make_pair(it, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
MPH_MAP_METHOD_DECL(void_type, pack)() {
|
MPH_MAP_METHOD_DECL(void_type, pack)() {
|
||||||
|
// fprintf(stderr, "Paki %d values\n", values_.size());
|
||||||
if (values_.empty()) return;
|
if (values_.empty()) return;
|
||||||
assert(std::unordered_set<key_type>(make_iterator_first(begin()), make_iterator_first(end())).size() == size());
|
assert(std::unordered_set<key_type>(make_iterator_first(begin()), make_iterator_first(end())).size() == size());
|
||||||
bool success = index_.Reset(
|
bool success = index_.Reset(
|
||||||
|
@ -183,7 +192,7 @@ MPH_MAP_METHOD_DECL(void_type, pack)() {
|
||||||
new_values.reserve(new_values.size() * 2);
|
new_values.reserve(new_values.size() * 2);
|
||||||
std::vector<bool> new_present(index_.perfect_hash_size(), false);
|
std::vector<bool> new_present(index_.perfect_hash_size(), false);
|
||||||
new_present.reserve(new_present.size() * 2);
|
new_present.reserve(new_present.size() * 2);
|
||||||
auto new_nests_size = nextpoweroftwo(ceil(new_values.size())*10 + 1);
|
auto new_nests_size = nextpoweroftwo(ceil(new_values.size())*100 + 1);
|
||||||
dynamic_2bitset(new_nests_size, true /* fill with 1s */).swap(nests_);
|
dynamic_2bitset(new_nests_size, true /* fill with 1s */).swap(nests_);
|
||||||
vector<bool> used_nests(nests_.size());
|
vector<bool> used_nests(nests_.size());
|
||||||
uint32_t collisions = 0;
|
uint32_t collisions = 0;
|
||||||
|
@ -208,15 +217,24 @@ MPH_MAP_METHOD_DECL(void_type, pack)() {
|
||||||
used_nests[get_nest_index(h)] = true;
|
used_nests[get_nest_index(h)] = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (iterator it = begin(), it_end = end(); it != it_end; ++it) {
|
|
||||||
uint32_t h[4];
|
|
||||||
index_.hash_vector(it->first, h);
|
|
||||||
assert(get_nest_value(h) == kNestCollision || index_.perfect_hash(it->first) == index_.cuckoo_hash(h, get_nest_value(h)));
|
|
||||||
}
|
|
||||||
// fprintf(stderr, "Collision ratio: %f\n", collisions*1.0/size());
|
// fprintf(stderr, "Collision ratio: %f\n", collisions*1.0/size());
|
||||||
values_.swap(new_values);
|
values_.swap(new_values);
|
||||||
present_.swap(new_present);
|
present_.swap(new_present);
|
||||||
slack_type().swap(slack_);
|
slack_type().swap(slack_);
|
||||||
|
int32_t fast = 0;
|
||||||
|
int32_t slow= 0;
|
||||||
|
for (iterator it = begin(), it_end = end(); it != it_end; ++it) {
|
||||||
|
uint32_t h[4];
|
||||||
|
index_.hash_vector(it->first, h);
|
||||||
|
if (get_nest_value(h) == kNestCollision) ++slow;
|
||||||
|
else {
|
||||||
|
++fast;
|
||||||
|
auto cit = values_.begin() + index_.cuckoo_hash(h, get_nest_value(h));
|
||||||
|
assert(index_.perfect_hash(it->first) == cit - values_.begin());
|
||||||
|
assert(equal_(it->first, cit->first));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// fprintf(stderr, "Predicted fast: %d slow %d\n", fast, slow);
|
||||||
}
|
}
|
||||||
|
|
||||||
MPH_MAP_METHOD_DECL(iterator, begin)() { return make_iterator(values_.begin()); }
|
MPH_MAP_METHOD_DECL(iterator, begin)() { return make_iterator(values_.begin()); }
|
||||||
|
@ -231,7 +249,7 @@ MPH_MAP_METHOD_DECL(void_type, clear)() {
|
||||||
present_.clear();
|
present_.clear();
|
||||||
slack_.clear();
|
slack_.clear();
|
||||||
index_.clear();
|
index_.clear();
|
||||||
dynamic_2bitset(1, true /* fill with 1s */).swap(nests_);
|
dynamic_2bitset(8, true /* fill with 1s */).swap(nests_);
|
||||||
size_ = 0;
|
size_ = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -254,7 +272,10 @@ MPH_MAP_METHOD_DECL(const_iterator, find)(const key_type& k) const {
|
||||||
index_.hash_vector(k, h);
|
index_.hash_vector(k, h);
|
||||||
auto nest = get_nest_value(h);
|
auto nest = get_nest_value(h);
|
||||||
if (__builtin_expect(nest != kNestCollision, 1)) {
|
if (__builtin_expect(nest != kNestCollision, 1)) {
|
||||||
|
++fast_taken_;
|
||||||
auto vit = values_.begin() + index_.cuckoo_hash(h, nest);
|
auto vit = values_.begin() + index_.cuckoo_hash(h, nest);
|
||||||
|
// do not hold for unknown keys
|
||||||
|
assert(values_.size() != index_.perfect_hash_size() || equal_(k, vit->first));
|
||||||
if (equal_(k, vit->first)) {
|
if (equal_(k, vit->first)) {
|
||||||
++fast_;
|
++fast_;
|
||||||
return make_iterator(vit);
|
return make_iterator(vit);
|
||||||
|
@ -273,6 +294,7 @@ MPH_MAP_METHOD_DECL(const_iterator, slow_find)(const key_type& k, uint32_t perfe
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (__builtin_expect(!slack_.empty(), 0)) {
|
if (__builtin_expect(!slack_.empty(), 0)) {
|
||||||
|
++very_slow_;
|
||||||
auto sit = slack_.find(k);
|
auto sit = slack_.find(k);
|
||||||
if (sit != slack_.end()) return make_iterator(values_.begin() + sit->second);
|
if (sit != slack_.end()) return make_iterator(values_.begin() + sit->second);
|
||||||
}
|
}
|
||||||
|
@ -284,7 +306,9 @@ MPH_MAP_METHOD_DECL(iterator, find)(const key_type& k) {
|
||||||
index_.hash_vector(k, h);
|
index_.hash_vector(k, h);
|
||||||
auto nest = get_nest_value(h);
|
auto nest = get_nest_value(h);
|
||||||
if (__builtin_expect(nest != kNestCollision, 1)) {
|
if (__builtin_expect(nest != kNestCollision, 1)) {
|
||||||
|
++fast_taken_;
|
||||||
auto vit = values_.begin() + index_.cuckoo_hash(h, nest);
|
auto vit = values_.begin() + index_.cuckoo_hash(h, nest);
|
||||||
|
assert(values_.size() != index_.perfect_hash_size() || equal_(k, vit->first));
|
||||||
if (equal_(k, vit->first)) {
|
if (equal_(k, vit->first)) {
|
||||||
++fast_;
|
++fast_;
|
||||||
return make_iterator(vit);
|
return make_iterator(vit);
|
||||||
|
@ -303,6 +327,7 @@ MPH_MAP_METHOD_DECL(iterator, slow_find)(const key_type& k, uint32_t perfect_has
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (__builtin_expect(!slack_.empty(), 0)) {
|
if (__builtin_expect(!slack_.empty(), 0)) {
|
||||||
|
++very_slow_;
|
||||||
auto sit = slack_.find(k);
|
auto sit = slack_.find(k);
|
||||||
if (sit != slack_.end()) return make_iterator(values_.begin() + sit->second);
|
if (sit != slack_.end()) return make_iterator(values_.begin() + sit->second);
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,17 +15,20 @@ int main(int argc, char** argv) {
|
||||||
for (int i = 0; i < num_keys; ++i) {
|
for (int i = 0; i < num_keys; ++i) {
|
||||||
b.insert(make_pair(i, i));
|
b.insert(make_pair(i, i));
|
||||||
}
|
}
|
||||||
for (int i = 0; i < num_keys; ++i) {
|
b.rehash(b.size());
|
||||||
auto it = b.find(i);
|
fprintf(stderr, "Insertion finished\n");
|
||||||
|
for (int i = 0; i < 1000000; ++i) {
|
||||||
|
auto it = b.find(i % num_keys);
|
||||||
if (it == b.end()) {
|
if (it == b.end()) {
|
||||||
std::cerr << "Failed to find " << i << std::endl;
|
std::cerr << "Failed to find " << i << std::endl;
|
||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
if (it->first != it->second || it->first != i) {
|
if (it->first != it->second || it->first != i % num_keys) {
|
||||||
std::cerr << "Found " << it->first << " looking for " << i << std::endl;
|
std::cerr << "Found " << it->first << " looking for " << i << std::endl;
|
||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
mph_map<string, int> h;
|
mph_map<string, int> h;
|
||||||
h.insert(std::make_pair("-1",-1));
|
h.insert(std::make_pair("-1",-1));
|
||||||
mph_map<string, int>::const_iterator it;
|
mph_map<string, int>::const_iterator it;
|
||||||
|
@ -55,4 +58,5 @@ int main(int argc, char** argv) {
|
||||||
if (key < num_valid && it->second != key) exit(-1);
|
if (key < num_valid && it->second != key) exit(-1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue