Compiles, still need to fix size tracking.

This commit is contained in:
Davi Reis 2012-03-11 23:21:18 -03:00
parent c057fb882b
commit 238e384367
9 changed files with 167 additions and 60 deletions

View File

@ -1,12 +1,12 @@
TESTS = $(check_PROGRAMS)
check_PROGRAMS = mph_map_test mph_index_test trigraph_test
check_PROGRAMS = hollow_iterator_test mph_map_test mph_index_test trigraph_test
noinst_PROGRAMS = bm_index bm_map
bin_PROGRAMS = cxxmph
lib_LTLIBRARIES = libcxxmph.la
libcxxmph_la_SOURCES = MurmurHash2.h trigragh.h trigraph.cc mph_index.h mph_index.cc seeded_hash.h stringpiece.h benchmark.h benchmark.cc
libcxxmph_la_LDFLAGS = -version-info 0:0:0
cxxmph_includedir = $(includedir)/cxxmph/
cxxmph_include_HEADERS = mph_map.h mph_index.h MurmurHash2.h trigraph.h seeded_hash.h stringpiece.h
cxxmph_include_HEADERS = mph_map.h mph_index.h MurmurHash2.h trigraph.h seeded_hash.h stringpiece.h hollow_iterator.h
mph_map_test_LDADD = libcxxmph.la
mph_map_test_SOURCES = mph_map_test.cc
@ -25,3 +25,6 @@ bm_map_SOURCES = bm_common.cc bm_map.cc
cxxmph_LDADD = libcxxmph.la
cxxmph_SOURCES = cxxmph.cc
hollow_iterator_test_SOURCES = hollow_iterator_test.cc

View File

@ -21,7 +21,7 @@ class BM_MPHIndexCreate : public UrlsBenchmark {
protected:
virtual void Run() {
SimpleMPHIndex<StringPiece> index;
index.Reset(urls_.begin(), urls_.end());
index.Reset(urls_.begin(), urls_.end(), urls_.size());
}
};
@ -53,7 +53,7 @@ class BM_MPHIndexSearch : public SearchUrlsBenchmark {
protected:
virtual bool SetUp () {
if (!SearchUrlsBenchmark::SetUp()) return false;
index_.Reset(urls_.begin(), urls_.end());
index_.Reset(urls_.begin(), urls_.end(), urls_.size());
return true;
}
SimpleMPHIndex<StringPiece> index_;

View File

@ -13,7 +13,8 @@ namespace cxxmph {
template<class MapType, class T>
const T* myfind(const MapType& mymap, const T& k) {
auto it = mymap.find(k);
if (it == mymap.end()) return NULL;
auto end = mymap.end();
if (it == end) return NULL;
return &it->second;
}

View File

@ -63,8 +63,8 @@ int main(int argc, char** argv) {
for (int i = 0; i < keys.size(); ++i) table[keys[i]] = keys[i];
mph_map<string, string>::const_iterator it = table.begin();
mph_map<string, string>::const_iterator end = table.end();
for (; it != end; ++it) {
cout << (it - table.begin()) << ": " << it->first
for (int i = 0; it != end; ++it, ++i) {
cout << i << ": " << it->first
<<" -> " << it->second << endl;
}
}

69
cxxmph/hollow_iterator.h Normal file
View File

@ -0,0 +1,69 @@
#ifndef __CXXMPH_HOLLOW_ITERATOR_H__
#define __CXXMPH_HOLLOW_ITERATOR_H__
#include <vector>
namespace cxxmph {
template <typename container_type, typename presence_type, typename iterator_type>
struct hollow_iterator_base
: public std::iterator<std::forward_iterator_tag,
typename container_type::value_type> {
typedef presence_type presence;
typedef container_type container;
typedef iterator_type iterator;
typedef hollow_iterator_base<container, presence, iterator>& self_reference;
typedef typename iterator::reference reference;
typedef typename iterator::pointer pointer;
hollow_iterator_base(container* c, presence* p, iterator it)
: c_(c), p_(p), it_(it) { find_present(); }
self_reference operator++() {
++it_; find_present();
}
reference operator*() { return *it_; }
pointer operator->() { return &(*it_); }
// TODO find syntax to make this less permissible at compile time
template <class T>
bool operator==(const T& rhs) { return rhs.it_ == this->it_; }
template <class T>
bool operator!=(const T& rhs) { return rhs.it_ != this->it_; }
public: // TODO find syntax to make this friend of const iterator
void find_present() {
while (it_ != c_->end() && !((*p_)[it_-c_->begin()])) ++it_;
}
container* c_;
presence* p_;
iterator it_;
};
template <typename container_type>
struct hollow_iterator : public hollow_iterator_base<
container_type, std::vector<bool>, typename container_type::iterator> {
typedef hollow_iterator_base<
container_type, std::vector<bool>, typename container_type::iterator> parent_class;
hollow_iterator(typename parent_class::container* c,
typename parent_class::presence* p,
typename parent_class::iterator it)
: parent_class(c, p, it) { }
};
template <typename container_type>
struct hollow_const_iterator : public hollow_iterator_base<
const container_type, const std::vector<bool>, typename container_type::const_iterator> {
typedef hollow_iterator_base<
const container_type, const std::vector<bool>, typename container_type::const_iterator> parent_class;
typedef hollow_const_iterator<container_type> self_type;
typedef hollow_iterator<container_type> non_const_type;
hollow_const_iterator(non_const_type rhs) : parent_class(rhs.c_, rhs.p_, typename container_type::const_iterator(rhs.it_)) { }
hollow_const_iterator(const typename parent_class::container* c,
const typename parent_class::presence* p,
typename parent_class::iterator it)
: parent_class(c, p, it) { }
};
} // namespace cxxmph
#endif // __CXXMPH_HOLLOW_ITERATOR_H__

View File

@ -0,0 +1,35 @@
#include <cstdlib>
#include <cstdio>
#include <vector>
#include "hollow_iterator.h"
using std::vector;
using cxxmph::hollow_iterator;
using cxxmph::hollow_const_iterator;
int main(int argc, char** argv) {
vector<int> v;
vector<bool> p;
for (int i = 0; i < 100; ++i) {
v.push_back(i);
p.push_back(i % 2 == 0);
}
auto begin = hollow_iterator<vector<int>>(&v, &p, v.begin());
auto end = hollow_iterator<vector<int>>(&v, &p, v.end());
for (auto it = begin; it != end; ++it) {
if (((*it) % 2) != 0) exit(-1);
}
hollow_const_iterator<vector<int>> const_begin(begin);
hollow_const_iterator<vector<int>> const_end(end);
for (auto it = const_begin; it != const_end; ++it) {
if (((*it) % 2) != 0) exit(-1);
}
vector<int>::iterator vit1 = v.begin();
vector<int>::const_iterator vit2 = v.begin();
if (vit1 != vit2) exit(-1);
auto it1 = hollow_iterator<vector<int>>(&v, &p, v.begin());
auto it2 = hollow_const_iterator<vector<int>>(&v, &p, v.begin());
if (it1 != it2) exit(-1);
}

View File

@ -48,7 +48,7 @@ class MPHIndex {
~MPHIndex();
template <class SeededHashFcn, class ForwardIterator>
bool Reset(ForwardIterator begin, ForwardIterator end);
bool Reset(ForwardIterator begin, ForwardIterator end, uint32_t size);
template <class SeededHashFcn, class Key> // must agree with Reset
// Get a unique identifier for k, in the range [0;size()). If x wasn't part
// of the input in the last Reset call, returns a random value.
@ -120,12 +120,13 @@ class MPHIndex {
// Template method needs to go in the header file.
template <class SeededHashFcn, class ForwardIterator>
bool MPHIndex::Reset(ForwardIterator begin, ForwardIterator end) {
bool MPHIndex::Reset(
ForwardIterator begin, ForwardIterator end, uint32_t size) {
if (end == begin) {
clear();
return true;
}
m_ = end - begin;
m_ = size;
r_ = static_cast<uint32_t>(ceil((c_*m_)/3));
if ((r_ % 2) == 0) r_ += 1;
n_ = 3*r_;
@ -204,8 +205,8 @@ template <class Key, class HashFcn = typename seeded_hash<std::hash<Key> >::hash
class SimpleMPHIndex : public MPHIndex {
public:
template <class ForwardIterator>
bool Reset(ForwardIterator begin, ForwardIterator end) {
return MPHIndex::Reset<HashFcn>(begin, end);
bool Reset(ForwardIterator begin, ForwardIterator end, uint32_t size) {
return MPHIndex::Reset<HashFcn>(begin, end, size);
}
uint32_t index(const Key& key) const { return MPHIndex::index<HashFcn>(key); }
uint32_t perfect_hash(const Key& key) const { return MPHIndex::perfect_hash<HashFcn>(key); }

View File

@ -24,7 +24,7 @@ int main(int argc, char** argv) {
keys.push_back("algume");
SimpleMPHIndex<string> mph_index;
if (!mph_index.Reset(keys.begin(), keys.end())) { exit(-1); }
if (!mph_index.Reset(keys.begin(), keys.end(), keys.size())) { exit(-1); }
vector<int> ids;
for (vector<int>::size_type i = 0; i < keys.size(); ++i) {
ids.push_back(mph_index.index(keys[i]));

View File

@ -14,6 +14,7 @@
#include "MurmurHash2.h"
#include "mph_index.h"
#include "hollow_iterator.h"
namespace cxxmph {
@ -42,17 +43,8 @@ class mph_map {
typedef typename std::vector<value_type>::size_type size_type;
typedef typename std::vector<value_type>::difference_type difference_type;
template <class T, typename iterator>
struct indirect_iterator : public typename slack_type::iterator {
indirect_iterator(T* v, iterator it) : iterator(it), v_(v) { }
const typename iterator::value_type::first_type& operator*() const {
return v->begin() + (this->iterator::operator*())->second;
}
};
typedef indirect_iterator<std::vector<value_type>, slack_type>::iterator iterator;
typedef indirect_iterator<std::vector<value_type>, slack_type>::const_iterator const_iterator;
typedef hollow_iterator<std::vector<value_type>> iterator;
typedef hollow_const_iterator<std::vector<value_type>> const_iterator;
// For making macros simpler.
typedef void void_type;
@ -90,7 +82,7 @@ class mph_map {
template <typename iterator>
struct iterator_first : public iterator {
iterator_first(iterator it) : iterator(it) { }
const typename iterator::value_type::first_type& operator*() const {
const typename iterator::value_type::first_type& operator*() {
return this->iterator::operator*().first;
}
};
@ -100,25 +92,29 @@ class mph_map {
return iterator_first<iterator>(it);
}
template <class T, typename iterator>
indirect_iterator<iterator> make_indirect_iterator(T* v, iterator it) {
return indirect_iterator<iterator>(v, it);
iterator make_iterator(typename std::vector<value_type>::iterator it) {
return hollow_iterator<std::vector<value_type>>(&values_, &present_, it);
}
const_iterator make_iterator(typename std::vector<value_type>::const_iterator it) const {
return hollow_const_iterator<std::vector<value_type>>(&values_, &present_, it);
}
void pack();
std::vector<value_type> values_;
std::vector<bool> present_;
SimpleMPHIndex<Key, typename seeded_hash<HashFcn>::hash_function> index_;
// TODO(davi) optimize slack to no hold a copy of the key
typedef unordered_map<Key, uint32_t, HashFcn, EqualKey, Alloc> slack_type;
slack_type slack_;
size_type size_;
};
MPH_MAP_TMPL_SPEC
bool operator==(const MPH_MAP_CLASS_SPEC& lhs, const MPH_MAP_CLASS_SPEC& rhs) {
return lhs.values_ == rhs.values_;
return lhs.size() == rhs.size() && std::equal(lhs.begin(), lhs.end(), rhs.begin());
}
MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::mph_map() {
MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::mph_map() : size_(0) {
pack();
}
@ -126,13 +122,15 @@ MPH_MAP_TMPL_SPEC MPH_MAP_CLASS_SPEC::~mph_map() {
}
MPH_MAP_METHOD_DECL(insert_return_type, insert)(const value_type& x) {
iterator it = find(x.first);
if (it != end()) return make_pair(it, false);
should_pack = false;
auto it = find(x.first);
auto it_end = end();
if (it != it_end) return make_pair(it, false);
bool should_pack = false;
if (values_.capacity() == values_.size() && values_.size() > 256) {
should_pack = true;
}
values_.push_back(x);
present_.push_back(true);
slack_.insert(make_pair(x.first, values_.size() - 1));
if (should_pack) pack();
it = find(x.first);
@ -142,43 +140,39 @@ MPH_MAP_METHOD_DECL(insert_return_type, insert)(const value_type& x) {
MPH_MAP_METHOD_DECL(void_type, pack)() {
if (values_.empty()) return;
bool success = index_.Reset(
make_iterator_first(slack_.begin())),
make_iterator_first(slack_.end())));
make_iterator_first(begin()),
make_iterator_first(end()), size_);
assert(success);
std::vector<value_type> new_values(index_.size());
for (const_iterator it = values_.begin(), end = values_.end();
it != end; ++it) {
size_type id = index_.index((*it)->first);
std::vector<bool> new_present(index_.size(), false);
for (iterator it(begin()), it_end(end()); it != it_end; ++it) {
size_type id = index_.index(it->first);
assert(id < new_values.size());
new_values[id] = *it;
new_present[id] = true;
}
values_.swap(new_values);
std::vector<size_type> new_values_pointer(
index_.perfect_hash_size());;
for (size_type i = 0; i < values_.size(); ++i) {
size_type id = index_.perfect_hash(values_[i].first);
assert(id < new_values_pointer.size());
new_values_pointer[id] = i;
}
values_pointer_.swap(new_values_pointer);
present_.swap(new_present);
slack_type().swap(slack_);
}
MPH_MAP_METHOD_DECL(iterator, begin)() { return values_.begin(); }
MPH_MAP_METHOD_DECL(iterator, end)() { return values_.end(); }
MPH_MAP_METHOD_DECL(const_iterator, begin)() const { return values_.begin(); }
MPH_MAP_METHOD_DECL(const_iterator, end)() const { return values_.end(); }
MPH_MAP_METHOD_DECL(bool_type, empty)() const { return values_.empty(); }
MPH_MAP_METHOD_DECL(size_type, size)() const { return values_.size(); }
MPH_MAP_METHOD_DECL(iterator, begin)() { return make_iterator(values_.begin()); }
MPH_MAP_METHOD_DECL(iterator, end)() { return make_iterator(values_.end()); }
MPH_MAP_METHOD_DECL(const_iterator, begin)() const { return make_iterator(values_.begin()); }
MPH_MAP_METHOD_DECL(const_iterator, end)() const { return make_iterator(values_.end()); }
MPH_MAP_METHOD_DECL(bool_type, empty)() const { return size_ == 0; }
MPH_MAP_METHOD_DECL(size_type, size)() const { return size_; }
MPH_MAP_METHOD_DECL(void_type, clear)() {
values_.clear();
present_.clear();
slack_.clear();
index_.clear();
}
MPH_MAP_METHOD_DECL(void_type, erase)(iterator pos) {
values_.erase(pos);
pack();
present_[pos - begin] = false;
*pos = value_type();
}
MPH_MAP_METHOD_DECL(void_type, erase)(const key_type& k) {
iterator it = find(k);
@ -188,22 +182,26 @@ MPH_MAP_METHOD_DECL(void_type, erase)(const key_type& k) {
MPH_MAP_METHOD_DECL(const_iterator, find)(const key_type& k) const {
if (__builtin_expect(!slack_.empty(), 0)) {
typename slack_type::const_iterator it = slack_.find(k);
if (it != slack_.end()) return values_.begin() + it->second;
auto it = slack_.find(k);
if (it != slack_.end()) return make_iterator(values_.begin() + it->second);
}
if (__builtin_expect(index_.size() == 0, 0)) return end();
const_iterator it = values_.begin() + values_pointer_[index_.perfect_hash(k)];
auto id = index_.perfect_hash(k);
if (!present_[id]) return end();
auto it = make_iterator(values_.begin() + id);
if (__builtin_expect(equal_(k, it->first), 1)) return it;
return end();
}
MPH_MAP_METHOD_DECL(iterator, find)(const key_type& k) {
if (__builtin_expect(!slack_.empty(), 0)) {
typename slack_type::const_iterator it = slack_.find(k);
if (it != slack_.end()) return values_.begin() + it->second;
auto it = slack_.find(k);
if (it != slack_.end()) return make_iterator(values_.begin() + it->second);
}
if (__builtin_expect(index_.size() == 0, 0)) return end();
iterator it = values_.begin() + values_pointer_[index_.perfect_hash(k)];
auto id = index_.perfect_hash(k);
if (!present_[id]) return end();
auto it = make_iterator(values_.begin() + id);
if (__builtin_expect(equal_(k, it->first), 1)) return it;
return end();
}