Compiles, still need to fix size tracking.
This commit is contained in:
parent
c057fb882b
commit
238e384367
@ -1,12 +1,12 @@
|
||||
TESTS = $(check_PROGRAMS)
|
||||
check_PROGRAMS = mph_map_test mph_index_test trigraph_test
|
||||
check_PROGRAMS = hollow_iterator_test mph_map_test mph_index_test trigraph_test
|
||||
noinst_PROGRAMS = bm_index bm_map
|
||||
bin_PROGRAMS = cxxmph
|
||||
lib_LTLIBRARIES = libcxxmph.la
|
||||
libcxxmph_la_SOURCES = MurmurHash2.h trigragh.h trigraph.cc mph_index.h mph_index.cc seeded_hash.h stringpiece.h benchmark.h benchmark.cc
|
||||
libcxxmph_la_LDFLAGS = -version-info 0:0:0
|
||||
cxxmph_includedir = $(includedir)/cxxmph/
|
||||
cxxmph_include_HEADERS = mph_map.h mph_index.h MurmurHash2.h trigraph.h seeded_hash.h stringpiece.h
|
||||
cxxmph_include_HEADERS = mph_map.h mph_index.h MurmurHash2.h trigraph.h seeded_hash.h stringpiece.h hollow_iterator.h
|
||||
|
||||
mph_map_test_LDADD = libcxxmph.la
|
||||
mph_map_test_SOURCES = mph_map_test.cc
|
||||
@ -25,3 +25,6 @@ bm_map_SOURCES = bm_common.cc bm_map.cc
|
||||
|
||||
cxxmph_LDADD = libcxxmph.la
|
||||
cxxmph_SOURCES = cxxmph.cc
|
||||
|
||||
hollow_iterator_test_SOURCES = hollow_iterator_test.cc
|
||||
|
||||
|
@ -21,7 +21,7 @@ class BM_MPHIndexCreate : public UrlsBenchmark {
|
||||
protected:
|
||||
virtual void Run() {
|
||||
SimpleMPHIndex<StringPiece> index;
|
||||
index.Reset(urls_.begin(), urls_.end());
|
||||
index.Reset(urls_.begin(), urls_.end(), urls_.size());
|
||||
}
|
||||
};
|
||||
|
||||
@ -53,7 +53,7 @@ class BM_MPHIndexSearch : public SearchUrlsBenchmark {
|
||||
protected:
|
||||
virtual bool SetUp () {
|
||||
if (!SearchUrlsBenchmark::SetUp()) return false;
|
||||
index_.Reset(urls_.begin(), urls_.end());
|
||||
index_.Reset(urls_.begin(), urls_.end(), urls_.size());
|
||||
return true;
|
||||
}
|
||||
SimpleMPHIndex<StringPiece> index_;
|
||||
|
@ -13,7 +13,8 @@ namespace cxxmph {
|
||||
template<class MapType, class T>
|
||||
const T* myfind(const MapType& mymap, const T& k) {
|
||||
auto it = mymap.find(k);
|
||||
if (it == mymap.end()) return NULL;
|
||||
auto end = mymap.end();
|
||||
if (it == end) return NULL;
|
||||
return &it->second;
|
||||
}
|
||||
|
||||
|
@ -63,8 +63,8 @@ int main(int argc, char** argv) {
|
||||
for (int i = 0; i < keys.size(); ++i) table[keys[i]] = keys[i];
|
||||
mph_map<string, string>::const_iterator it = table.begin();
|
||||
mph_map<string, string>::const_iterator end = table.end();
|
||||
for (; it != end; ++it) {
|
||||
cout << (it - table.begin()) << ": " << it->first
|
||||
for (int i = 0; it != end; ++it, ++i) {
|
||||
cout << i << ": " << it->first
|
||||
<<" -> " << it->second << endl;
|
||||
}
|
||||
}
|
||||
|
69
cxxmph/hollow_iterator.h
Normal file
69
cxxmph/hollow_iterator.h
Normal file
@ -0,0 +1,69 @@
|
||||
#ifndef __CXXMPH_HOLLOW_ITERATOR_H__
|
||||
#define __CXXMPH_HOLLOW_ITERATOR_H__
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace cxxmph {
|
||||
|
||||
template <typename container_type, typename presence_type, typename iterator_type>
|
||||
struct hollow_iterator_base
|
||||
: public std::iterator<std::forward_iterator_tag,
|
||||
typename container_type::value_type> {
|
||||
typedef presence_type presence;
|
||||
typedef container_type container;
|
||||
typedef iterator_type iterator;
|
||||
typedef hollow_iterator_base<container, presence, iterator>& self_reference;
|
||||
typedef typename iterator::reference reference;
|
||||
typedef typename iterator::pointer pointer;
|
||||
|
||||
hollow_iterator_base(container* c, presence* p, iterator it)
|
||||
: c_(c), p_(p), it_(it) { find_present(); }
|
||||
self_reference operator++() {
|
||||
++it_; find_present();
|
||||
}
|
||||
reference operator*() { return *it_; }
|
||||
pointer operator->() { return &(*it_); }
|
||||
|
||||
// TODO find syntax to make this less permissible at compile time
|
||||
template <class T>
|
||||
bool operator==(const T& rhs) { return rhs.it_ == this->it_; }
|
||||
template <class T>
|
||||
bool operator!=(const T& rhs) { return rhs.it_ != this->it_; }
|
||||
|
||||
public: // TODO find syntax to make this friend of const iterator
|
||||
void find_present() {
|
||||
while (it_ != c_->end() && !((*p_)[it_-c_->begin()])) ++it_;
|
||||
}
|
||||
container* c_;
|
||||
presence* p_;
|
||||
iterator it_;
|
||||
};
|
||||
|
||||
template <typename container_type>
|
||||
struct hollow_iterator : public hollow_iterator_base<
|
||||
container_type, std::vector<bool>, typename container_type::iterator> {
|
||||
typedef hollow_iterator_base<
|
||||
container_type, std::vector<bool>, typename container_type::iterator> parent_class;
|
||||
hollow_iterator(typename parent_class::container* c,
|
||||
typename parent_class::presence* p,
|
||||
typename parent_class::iterator it)
|
||||
: parent_class(c, p, it) { }
|
||||
};
|
||||
|
||||
template <typename container_type>
|
||||
struct hollow_const_iterator : public hollow_iterator_base<
|
||||
const container_type, const std::vector<bool>, typename container_type::const_iterator> {
|
||||
typedef hollow_iterator_base<
|
||||
const container_type, const std::vector<bool>, typename container_type::const_iterator> parent_class;
|
||||
typedef hollow_const_iterator<container_type> self_type;
|
||||
typedef hollow_iterator<container_type> non_const_type;
|
||||
hollow_const_iterator(non_const_type rhs) : parent_class(rhs.c_, rhs.p_, typename container_type::const_iterator(rhs.it_)) { }
|
||||
hollow_const_iterator(const typename parent_class::container* c,
|
||||
const typename parent_class::presence* p,
|
||||
typename parent_class::iterator it)
|
||||
: parent_class(c, p, it) { }
|
||||
};
|
||||
|
||||
} // namespace cxxmph
|
||||
|
||||
#endif // __CXXMPH_HOLLOW_ITERATOR_H__
|
35
cxxmph/hollow_iterator_test.cc
Normal file
35
cxxmph/hollow_iterator_test.cc
Normal file
@ -0,0 +1,35 @@
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <vector>
|
||||
|
||||
#include "hollow_iterator.h"
|
||||
|
||||
using std::vector;
|
||||
using cxxmph::hollow_iterator;
|
||||
using cxxmph::hollow_const_iterator;
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
vector<int> v;
|
||||
vector<bool> p;
|
||||
for (int i = 0; i < 100; ++i) {
|
||||
v.push_back(i);
|
||||
p.push_back(i % 2 == 0);
|
||||
}
|
||||
auto begin = hollow_iterator<vector<int>>(&v, &p, v.begin());
|
||||
auto end = hollow_iterator<vector<int>>(&v, &p, v.end());
|
||||
for (auto it = begin; it != end; ++it) {
|
||||
if (((*it) % 2) != 0) exit(-1);
|
||||
}
|
||||
hollow_const_iterator<vector<int>> const_begin(begin);
|
||||
hollow_const_iterator<vector<int>> const_end(end);
|
||||
for (auto it = const_begin; it != const_end; ++it) {
|
||||
if (((*it) % 2) != 0) exit(-1);
|
||||
}
|
||||
vector<int>::iterator vit1 = v.begin();
|
||||
vector<int>::const_iterator vit2 = v.begin();
|
||||
if (vit1 != vit2) exit(-1);
|
||||
auto it1 = hollow_iterator<vector<int>>(&v, &p, v.begin());
|
||||
auto it2 = hollow_const_iterator<vector<int>>(&v, &p, v.begin());
|
||||
if (it1 != it2) exit(-1);
|
||||
}
|
||||
|
@ -48,7 +48,7 @@ class MPHIndex {
|
||||
~MPHIndex();
|
||||
|
||||
template <class SeededHashFcn, class ForwardIterator>
|
||||
bool Reset(ForwardIterator begin, ForwardIterator end);
|
||||
bool Reset(ForwardIterator begin, ForwardIterator end, uint32_t size);
|
||||
template <class SeededHashFcn, class Key> // must agree with Reset
|
||||
// Get a unique identifier for k, in the range [0;size()). If x wasn't part
|
||||
// of the input in the last Reset call, returns a random value.
|
||||
@ -120,12 +120,13 @@ class MPHIndex {
|
||||
|
||||
// Template method needs to go in the header file.
|
||||
template <class SeededHashFcn, class ForwardIterator>
|
||||
bool MPHIndex::Reset(ForwardIterator begin, ForwardIterator end) {
|
||||
bool MPHIndex::Reset(
|
||||
ForwardIterator begin, ForwardIterator end, uint32_t size) {
|
||||
if (end == begin) {
|
||||
clear();
|
||||
return true;
|
||||
}
|
||||
m_ = end - begin;
|
||||
m_ = size;
|
||||
r_ = static_cast<uint32_t>(ceil((c_*m_)/3));
|
||||
if ((r_ % 2) == 0) r_ += 1;
|
||||
n_ = 3*r_;
|
||||
@ -204,8 +205,8 @@ template <class Key, class HashFcn = typename seeded_hash<std::hash<Key> >::hash
|
||||
class SimpleMPHIndex : public MPHIndex {
|
||||
public:
|
||||
template <class ForwardIterator>
|
||||
bool Reset(ForwardIterator begin, ForwardIterator end) {
|
||||
return MPHIndex::Reset<HashFcn>(begin, end);
|
||||
bool Reset(ForwardIterator begin, ForwardIterator end, uint32_t size) {
|
||||
return MPHIndex::Reset<HashFcn>(begin, end, size);
|
||||
}
|
||||
uint32_t index(const Key& key) const { return MPHIndex::index<HashFcn>(key); }
|
||||
uint32_t perfect_hash(const Key& key) const { return MPHIndex::perfect_hash<HashFcn>(key); }
|
||||
|
@ -24,7 +24,7 @@ int main(int argc, char** argv) {
|
||||
keys.push_back("algume");
|
||||
|
||||
SimpleMPHIndex<string> mph_index;
|
||||
if (!mph_index.Reset(keys.begin(), keys.end())) { exit(-1); }
|
||||
if (!mph_index.Reset(keys.begin(), keys.end(), keys.size())) { exit(-1); }
|
||||
vector<int> ids;
|
||||
for (vector<int>::size_type i = 0; i < keys.size(); ++i) {
|
||||
ids.push_back(mph_index.index(keys[i]));
|
||||
|
@ -14,6 +14,7 @@
|
||||
|
||||
#include "MurmurHash2.h"
|
||||
#include "mph_index.h"
|
||||
#include "hollow_iterator.h"
|
||||
|
||||
namespace cxxmph {
|
||||
|
||||
@ -42,17 +43,8 @@ class mph_map {
|
||||
typedef typename std::vector<value_type>::size_type size_type;
|
||||
typedef typename std::vector<value_type>::difference_type difference_type;
|
||||
|
||||
template <class T, typename iterator>
|
||||
struct indirect_iterator : public typename slack_type::iterator {
|
||||
indirect_iterator(T* v, iterator it) : iterator(it), v_(v) { }
|
||||
const typename iterator::value_type::first_type& operator*() const {
|
||||
return v->begin() + (this->iterator::operator*())->second;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
typedef indirect_iterator<std::vector<value_type>, slack_type>::iterator iterator;
|
||||
typedef indirect_iterator<std::vector<value_type>, slack_type>::const_iterator const_iterator;
|
||||
typedef hollow_iterator<std::vector<value_type>> iterator;
|
||||
typedef hollow_const_iterator<std::vector<value_type>> const_iterator;
|
||||
|
||||
// For making macros simpler.
|
||||
typedef void void_type;
|
||||
@ -90,7 +82,7 @@ class mph_map {
|
||||
template <typename iterator>
|
||||
struct iterator_first : public iterator {
|
||||
iterator_first(iterator it) : iterator(it) { }
|
||||
const typename iterator::value_type::first_type& operator*() const {
|
||||
const typename iterator::value_type::first_type& operator*() {
|
||||
return this->iterator::operator*().first;
|
||||
}
|
||||
};
|
||||
@ -100,25 +92,29 @@ class mph_map {
|
||||
return iterator_first<iterator>(it);
|
||||
}
|
||||
|
||||
template <class T, typename iterator>
|
||||
indirect_iterator<iterator> make_indirect_iterator(T* v, iterator it) {
|
||||
return indirect_iterator<iterator>(v, it);
|
||||
iterator make_iterator(typename std::vector<value_type>::iterator it) {
|
||||
return hollow_iterator<std::vector<value_type>>(&values_, &present_, it);
|
||||
}
|
||||
const_iterator make_iterator(typename std::vector<value_type>::const_iterator it) const {
|
||||
return hollow_const_iterator<std::vector<value_type>>(&values_, &present_, it);
|
||||
}
|
||||
|
||||
void pack();
|
||||
std::vector<value_type> values_;
|
||||
std::vector<bool> present_;
|
||||
SimpleMPHIndex<Key, typename seeded_hash<HashFcn>::hash_function> index_;
|
||||
// TODO(davi) optimize slack to no hold a copy of the key
|
||||
typedef unordered_map<Key, uint32_t, HashFcn, EqualKey, Alloc> slack_type;
|
||||
slack_type slack_;
|
||||
size_type size_;
|
||||
};
|
||||
|
||||
MPH_MAP_TMPL_SPEC
|
||||
bool operator==(const MPH_MAP_CLASS_SPEC& lhs, const MPH_MAP_CLASS_SPEC& rhs) {
|
||||
return lhs.values_ == rhs.values_;
|
||||
return lhs.size() == rhs.size() && std::equal(lhs.begin(), lhs.end(), rhs.begin());
|
||||
}
|
||||
|
||||
MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::mph_map() {
|
||||
MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::mph_map() : size_(0) {
|
||||
pack();
|
||||
}
|
||||
|
||||
@ -126,13 +122,15 @@ MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::~mph_map() {
|
||||
}
|
||||
|
||||
MPH_MAP_METHOD_DECL(insert_return_type, insert)(const value_type& x) {
|
||||
iterator it = find(x.first);
|
||||
if (it != end()) return make_pair(it, false);
|
||||
should_pack = false;
|
||||
auto it = find(x.first);
|
||||
auto it_end = end();
|
||||
if (it != it_end) return make_pair(it, false);
|
||||
bool should_pack = false;
|
||||
if (values_.capacity() == values_.size() && values_.size() > 256) {
|
||||
should_pack = true;
|
||||
}
|
||||
values_.push_back(x);
|
||||
present_.push_back(true);
|
||||
slack_.insert(make_pair(x.first, values_.size() - 1));
|
||||
if (should_pack) pack();
|
||||
it = find(x.first);
|
||||
@ -142,43 +140,39 @@ MPH_MAP_METHOD_DECL(insert_return_type, insert)(const value_type& x) {
|
||||
MPH_MAP_METHOD_DECL(void_type, pack)() {
|
||||
if (values_.empty()) return;
|
||||
bool success = index_.Reset(
|
||||
make_iterator_first(slack_.begin())),
|
||||
make_iterator_first(slack_.end())));
|
||||
make_iterator_first(begin()),
|
||||
make_iterator_first(end()), size_);
|
||||
assert(success);
|
||||
std::vector<value_type> new_values(index_.size());
|
||||
for (const_iterator it = values_.begin(), end = values_.end();
|
||||
it != end; ++it) {
|
||||
size_type id = index_.index((*it)->first);
|
||||
std::vector<bool> new_present(index_.size(), false);
|
||||
for (iterator it(begin()), it_end(end()); it != it_end; ++it) {
|
||||
size_type id = index_.index(it->first);
|
||||
assert(id < new_values.size());
|
||||
new_values[id] = *it;
|
||||
new_present[id] = true;
|
||||
}
|
||||
values_.swap(new_values);
|
||||
std::vector<size_type> new_values_pointer(
|
||||
index_.perfect_hash_size());;
|
||||
for (size_type i = 0; i < values_.size(); ++i) {
|
||||
size_type id = index_.perfect_hash(values_[i].first);
|
||||
assert(id < new_values_pointer.size());
|
||||
new_values_pointer[id] = i;
|
||||
}
|
||||
values_pointer_.swap(new_values_pointer);
|
||||
present_.swap(new_present);
|
||||
slack_type().swap(slack_);
|
||||
}
|
||||
|
||||
MPH_MAP_METHOD_DECL(iterator, begin)() { return values_.begin(); }
|
||||
MPH_MAP_METHOD_DECL(iterator, end)() { return values_.end(); }
|
||||
MPH_MAP_METHOD_DECL(const_iterator, begin)() const { return values_.begin(); }
|
||||
MPH_MAP_METHOD_DECL(const_iterator, end)() const { return values_.end(); }
|
||||
MPH_MAP_METHOD_DECL(bool_type, empty)() const { return values_.empty(); }
|
||||
MPH_MAP_METHOD_DECL(size_type, size)() const { return values_.size(); }
|
||||
MPH_MAP_METHOD_DECL(iterator, begin)() { return make_iterator(values_.begin()); }
|
||||
MPH_MAP_METHOD_DECL(iterator, end)() { return make_iterator(values_.end()); }
|
||||
MPH_MAP_METHOD_DECL(const_iterator, begin)() const { return make_iterator(values_.begin()); }
|
||||
MPH_MAP_METHOD_DECL(const_iterator, end)() const { return make_iterator(values_.end()); }
|
||||
MPH_MAP_METHOD_DECL(bool_type, empty)() const { return size_ == 0; }
|
||||
MPH_MAP_METHOD_DECL(size_type, size)() const { return size_; }
|
||||
|
||||
MPH_MAP_METHOD_DECL(void_type, clear)() {
|
||||
values_.clear();
|
||||
present_.clear();
|
||||
slack_.clear();
|
||||
index_.clear();
|
||||
}
|
||||
|
||||
MPH_MAP_METHOD_DECL(void_type, erase)(iterator pos) {
|
||||
values_.erase(pos);
|
||||
pack();
|
||||
present_[pos - begin] = false;
|
||||
*pos = value_type();
|
||||
}
|
||||
MPH_MAP_METHOD_DECL(void_type, erase)(const key_type& k) {
|
||||
iterator it = find(k);
|
||||
@ -188,22 +182,26 @@ MPH_MAP_METHOD_DECL(void_type, erase)(const key_type& k) {
|
||||
|
||||
MPH_MAP_METHOD_DECL(const_iterator, find)(const key_type& k) const {
|
||||
if (__builtin_expect(!slack_.empty(), 0)) {
|
||||
typename slack_type::const_iterator it = slack_.find(k);
|
||||
if (it != slack_.end()) return values_.begin() + it->second;
|
||||
auto it = slack_.find(k);
|
||||
if (it != slack_.end()) return make_iterator(values_.begin() + it->second);
|
||||
}
|
||||
if (__builtin_expect(index_.size() == 0, 0)) return end();
|
||||
const_iterator it = values_.begin() + values_pointer_[index_.perfect_hash(k)];
|
||||
auto id = index_.perfect_hash(k);
|
||||
if (!present_[id]) return end();
|
||||
auto it = make_iterator(values_.begin() + id);
|
||||
if (__builtin_expect(equal_(k, it->first), 1)) return it;
|
||||
return end();
|
||||
}
|
||||
|
||||
MPH_MAP_METHOD_DECL(iterator, find)(const key_type& k) {
|
||||
if (__builtin_expect(!slack_.empty(), 0)) {
|
||||
typename slack_type::const_iterator it = slack_.find(k);
|
||||
if (it != slack_.end()) return values_.begin() + it->second;
|
||||
auto it = slack_.find(k);
|
||||
if (it != slack_.end()) return make_iterator(values_.begin() + it->second);
|
||||
}
|
||||
if (__builtin_expect(index_.size() == 0, 0)) return end();
|
||||
iterator it = values_.begin() + values_pointer_[index_.perfect_hash(k)];
|
||||
auto id = index_.perfect_hash(k);
|
||||
if (!present_[id]) return end();
|
||||
auto it = make_iterator(values_.begin() + id);
|
||||
if (__builtin_expect(equal_(k, it->first), 1)) return it;
|
||||
return end();
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user