1
Fork 0
turbonss/cxxmph/mph_map.h

201 lines
6.3 KiB
C
Raw Normal View History

2011-12-05 20:03:10 +02:00
#ifndef __CXXMPH_MPH_MAP_H__
#define __CXXMPH_MPH_MAP_H__
2011-11-05 19:15:11 +02:00
// Implementation of the unordered associative mapping interface using a
// minimal perfect hash function.
//
// This class is about 20% to 100% slower than unordered_map (or ext/hash_map)
// and should not be used if performance is a concern. In fact, you should only
// use it for educational purposes.
2010-11-08 22:19:44 +02:00
#include <algorithm>
2011-11-10 20:44:37 +02:00
#include <unordered_map>
2010-06-28 22:01:18 +03:00
#include <vector>
#include <utility> // for std::pair
2010-10-29 09:26:37 +03:00
#include "MurmurHash2.h"
#include "mph_index.h"
2010-10-29 09:26:37 +03:00
namespace cxxmph {
2011-12-10 03:57:37 +02:00
using std::pair;
using std::make_pair;
2011-11-10 20:44:37 +02:00
using std::unordered_map;
2011-12-10 03:57:37 +02:00
using std::vector;
2011-05-24 03:18:24 +03:00
2010-06-28 22:01:18 +03:00
// Save on repetitive typing.
2011-05-16 02:47:42 +03:00
#define MPH_MAP_TMPL_SPEC template <class Key, class Data, class HashFcn, class EqualKey, class Alloc>
#define MPH_MAP_CLASS_SPEC mph_map<Key, Data, HashFcn, EqualKey, Alloc>
#define MPH_MAP_METHOD_DECL(r, m) MPH_MAP_TMPL_SPEC typename MPH_MAP_CLASS_SPEC::r MPH_MAP_CLASS_SPEC::m
2010-06-28 22:01:18 +03:00
2011-11-10 20:44:37 +02:00
template <class Key, class Data, class HashFcn = std::hash<Key>, class EqualKey = std::equal_to<Key>, class Alloc = std::allocator<Data> >
2011-05-16 02:47:42 +03:00
class mph_map {
2010-06-28 22:01:18 +03:00
public:
typedef Key key_type;
typedef Data data_type;
2011-12-10 03:57:37 +02:00
typedef pair<Key, Data> value_type;
2010-06-28 22:01:18 +03:00
typedef HashFcn hasher;
typedef EqualKey key_equal;
typedef typename std::vector<value_type>::pointer pointer;
typedef typename std::vector<value_type>::reference reference;
typedef typename std::vector<value_type>::const_reference const_reference;
typedef typename std::vector<value_type>::size_type size_type;
typedef typename std::vector<value_type>::difference_type difference_type;
typedef typename std::vector<value_type>::iterator iterator;
typedef typename std::vector<value_type>::const_iterator const_iterator;
// For making macros simpler.
typedef void void_type;
typedef bool bool_type;
2011-12-10 03:57:37 +02:00
typedef pair<iterator, bool> insert_return_type;
2010-06-28 22:01:18 +03:00
2011-05-16 02:47:42 +03:00
mph_map();
~mph_map();
2010-06-28 22:01:18 +03:00
iterator begin();
iterator end();
const_iterator begin() const;
const_iterator end() const;
size_type size() const;
bool empty() const;
void clear();
void erase(iterator pos);
void erase(const key_type& k);
2011-12-10 03:57:37 +02:00
pair<iterator, bool> insert(const value_type& x);
2010-06-28 22:01:18 +03:00
iterator find(const key_type& k);
const_iterator find(const key_type& k) const;
2011-12-10 03:57:37 +02:00
typedef int32_t my_int32_t; // help macros
2011-06-14 08:24:40 +03:00
int32_t index(const key_type& k) const;
2010-06-28 22:01:18 +03:00
data_type& operator[](const key_type &k);
2011-06-14 08:24:40 +03:00
const data_type& operator[](const key_type &k) const;
2010-06-28 22:01:18 +03:00
2011-05-24 03:18:24 +03:00
size_type bucket_count() const { return size(); }
2011-11-05 19:15:11 +02:00
// FIXME: not sure if this has the semantics I want
2011-05-24 03:18:24 +03:00
void rehash(size_type nbuckets /*ignored*/) { pack(); }
2010-06-28 22:01:18 +03:00
2011-06-14 08:24:40 +03:00
protected: // mimicking STL implementation
EqualKey equal_;
2010-06-28 22:01:18 +03:00
private:
2010-11-05 08:40:15 +02:00
template <typename iterator>
struct iterator_first : public iterator {
iterator_first(iterator it) : iterator(it) { }
const typename iterator::value_type::first_type& operator*() const {
return this->iterator::operator*().first;
}
};
template <typename iterator>
iterator_first<iterator> make_iterator_first(iterator it) {
return iterator_first<iterator>(it);
}
2011-05-24 03:18:24 +03:00
void pack();
2010-11-05 08:40:15 +02:00
std::vector<value_type> values_;
SimpleMPHIndex<Key, typename seeded_hash<HashFcn>::hash_function> index_;
2010-11-08 22:19:44 +02:00
// TODO(davi) optimize slack to no hold a copy of the key
2011-05-24 03:18:24 +03:00
typedef unordered_map<Key, uint32_t, HashFcn, EqualKey, Alloc> slack_type;
2010-11-05 08:40:15 +02:00
slack_type slack_;
2010-06-28 22:01:18 +03:00
};
2011-05-16 02:47:42 +03:00
MPH_MAP_TMPL_SPEC
bool operator==(const MPH_MAP_CLASS_SPEC& lhs, const MPH_MAP_CLASS_SPEC& rhs) {
2010-06-28 22:01:18 +03:00
return lhs.values_ == rhs.values_;
}
2011-05-16 02:47:42 +03:00
MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::mph_map() {
2011-05-24 03:18:24 +03:00
pack();
2010-06-28 22:01:18 +03:00
}
2011-05-16 02:47:42 +03:00
MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::~mph_map() {
2010-06-28 22:01:18 +03:00
}
2011-05-16 02:47:42 +03:00
MPH_MAP_METHOD_DECL(insert_return_type, insert)(const value_type& x) {
2010-06-28 22:01:18 +03:00
iterator it = find(x.first);
2011-12-10 03:57:37 +02:00
if (it != end()) return make_pair(it, false);
2010-06-28 22:01:18 +03:00
values_.push_back(x);
2011-12-10 03:57:37 +02:00
slack_.insert(make_pair(x.first, values_.size() - 1));
if (slack_.size() == index_.size() ||
(slack_.size() >= 256 && index_.size() == 0)) {
2011-05-24 03:18:24 +03:00
pack();
2010-10-29 09:26:37 +03:00
}
2010-06-28 22:01:18 +03:00
it = find(x.first);
2011-12-10 03:57:37 +02:00
return make_pair(it, true);
2010-06-28 22:01:18 +03:00
}
2011-05-24 03:18:24 +03:00
MPH_MAP_METHOD_DECL(void_type, pack)() {
2010-06-28 22:01:18 +03:00
if (values_.empty()) return;
slack_type().swap(slack_);
bool success = index_.Reset(
2010-11-09 02:02:18 +02:00
make_iterator_first(values_.begin()),
make_iterator_first(values_.end()));
assert(success);
2010-06-28 22:01:18 +03:00
std::vector<value_type> new_values(values_.size());
2010-11-06 13:14:07 +02:00
for (const_iterator it = values_.begin(), end = values_.end();
it != end; ++it) {
size_type id = index_.index(it->first);
2011-05-16 05:04:30 +03:00
assert(id < new_values.size());
2010-11-06 13:14:07 +02:00
new_values[id] = *it;
2010-06-28 22:01:18 +03:00
}
values_.swap(new_values);
}
2011-05-16 02:47:42 +03:00
MPH_MAP_METHOD_DECL(iterator, begin)() { return values_.begin(); }
MPH_MAP_METHOD_DECL(iterator, end)() { return values_.end(); }
MPH_MAP_METHOD_DECL(const_iterator, begin)() const { return values_.begin(); }
MPH_MAP_METHOD_DECL(const_iterator, end)() const { return values_.end(); }
MPH_MAP_METHOD_DECL(bool_type, empty)() const { return values_.empty(); }
2011-05-24 03:18:24 +03:00
MPH_MAP_METHOD_DECL(size_type, size)() const { return values_.size(); }
2010-06-28 22:01:18 +03:00
2011-05-16 02:47:42 +03:00
MPH_MAP_METHOD_DECL(void_type, clear)() {
2010-06-28 22:01:18 +03:00
values_.clear();
slack_.clear();
index_.clear();
2010-06-28 22:01:18 +03:00
}
2011-05-16 02:47:42 +03:00
MPH_MAP_METHOD_DECL(void_type, erase)(iterator pos) {
2010-06-28 22:01:18 +03:00
values_.erase(pos);
2011-05-24 03:18:24 +03:00
pack();
2010-06-28 22:01:18 +03:00
}
2011-05-16 02:47:42 +03:00
MPH_MAP_METHOD_DECL(void_type, erase)(const key_type& k) {
2010-06-28 22:01:18 +03:00
iterator it = find(k);
if (it == end()) return;
erase(it);
}
2011-05-16 02:47:42 +03:00
MPH_MAP_METHOD_DECL(const_iterator, find)(const key_type& k) const {
2011-06-14 08:24:40 +03:00
if (__builtin_expect(!slack_.empty(), 0)) {
typename slack_type::const_iterator it = slack_.find(k);
if (it != slack_.end()) return values_.begin() + it->second;
2010-06-28 22:01:18 +03:00
}
2011-06-14 08:24:40 +03:00
if (__builtin_expect(index_.size() == 0, 0)) return end();
2011-11-05 19:15:11 +02:00
const_iterator it = values_.begin() + index_.index(k);
2011-06-14 08:24:40 +03:00
if (__builtin_expect(equal_(k, it->first), 1)) return it;
2010-06-28 22:01:18 +03:00
return end();
}
2011-06-14 08:24:40 +03:00
2011-05-16 02:47:42 +03:00
MPH_MAP_METHOD_DECL(iterator, find)(const key_type& k) {
2010-06-28 22:01:18 +03:00
if (!slack_.empty()) {
2011-06-14 08:24:40 +03:00
typename slack_type::const_iterator it = slack_.find(k);
if (it != slack_.end()) return values_.begin() + it->second;
2010-06-28 22:01:18 +03:00
}
if (index_.size() == 0) return end();
2011-11-05 19:15:11 +02:00
iterator it = values_.begin() + index_.index(k);
2011-06-14 08:24:40 +03:00
if (equal_(it->first, k)) return it;
2010-06-28 22:01:18 +03:00
return end();
}
2011-06-14 08:24:40 +03:00
MPH_MAP_METHOD_DECL(my_int32_t, index)(const key_type& k) const {
assert(slack_.empty());
if (index_.size() == 0) return -1;
return index_.index(k);
}
2011-05-16 02:47:42 +03:00
MPH_MAP_METHOD_DECL(data_type&, operator[])(const key_type& k) {
2011-12-10 03:57:37 +02:00
return insert(make_pair(k, data_type())).first->second;
2010-06-28 22:01:18 +03:00
}
2010-10-29 09:26:37 +03:00
} // namespace cxxmph
2011-12-05 20:03:10 +02:00
#endif // __CXXMPH_MPH_MAP_H__