From 7ead7bff2fe424091c4ed179ebc7a483a3bf173e Mon Sep 17 00:00:00 2001 From: Davi de Castro Reis Date: Thu, 28 Oct 2010 23:26:37 -0700 Subject: [PATCH] Better. --- cxxmph/cmph_hash_map.h | 56 ++++++++++++++++++++---------------- cxxmph/cmph_hash_map_test.cc | 26 ++++++++++++----- cxxmph/mphtable.cc | 3 ++ cxxmph/mphtable.h | 2 ++ 4 files changed, 55 insertions(+), 32 deletions(-) diff --git a/cxxmph/cmph_hash_map.h b/cxxmph/cmph_hash_map.h index ac061ea..12d39f1 100644 --- a/cxxmph/cmph_hash_map.h +++ b/cxxmph/cmph_hash_map.h @@ -2,6 +2,20 @@ #include #include // for std::pair +#include "MurmurHash2.h" +#include "mphtable.h" +#include "iterator_first.h" + +namespace __gnu_cxx { +template <> struct hash { + std::size_t operator()(std::string const& s) const { + return MurmurHash2(s.c_str(), s.length(), 1 /* seed */); + } +}; +} + +namespace cxxmph { + // Save on repetitive typing. #define CMPH_TMPL_SPEC template #define CMPH_CLASS_SPEC cmph_hash_map @@ -51,7 +65,7 @@ class cmph_hash_map { private: void rehash(); std::vector values_; - cmph_t* cmph_; + MPHTable table_; typedef typename __gnu_cxx::hash_map slack_type; slack_type slack_; }; @@ -61,12 +75,11 @@ bool operator==(const CMPH_CLASS_SPEC& lhs, const CMPH_CLASS_SPEC& rhs) { return lhs.values_ == rhs.values_; } -CMPH_TMPL_SPEC CMPH_CLASS_SPEC::cmph_hash_map() : cmph_(NULL) { +CMPH_TMPL_SPEC CMPH_CLASS_SPEC::cmph_hash_map() { rehash(); } CMPH_TMPL_SPEC CMPH_CLASS_SPEC::~cmph_hash_map() { - if(cmph_) cmph_destroy(cmph_); } CMPH_METHOD_DECL(insert_return_type, insert)(const value_type& x) { @@ -74,28 +87,22 @@ CMPH_METHOD_DECL(insert_return_type, insert)(const value_type& x) { if (it != end()) return std::make_pair(it, false); values_.push_back(x); slack_.insert(std::make_pair(x.first, values_.size() - 1)); - if ((slack_.size() > 10 && !cmph_) || - (cmph_ && slack_.size() > cmph_size(cmph_) * 2)) rehash(); + if ((slack_.size() > 10 && table_.size() == 0) || + (table_.size() && slack_.size() > table_.size() * 2)) { + rehash(); + } it = find(x.first); - // std::cerr << "inserted " << x.first.i_ << " at " << values_.begin() - it; return std::make_pair(it, true); } CMPH_METHOD_DECL(void_type, rehash)() { if (values_.empty()) return; slack_type().swap(slack_); - cmph_io_adapter_t* source = cmph_io_struct_vector_adapter( - &(values_[0]), sizeof(value_type), 0, sizeof(key_type), values_.size()); - cmph_config_t* cmph_config = cmph_config_new(source); - cmph_config_set_algo(cmph_config, CMPH_CHD); - // cmph_config_set_verbosity(cmph_config, 1); - if (cmph_) cmph_destroy(cmph_); - cmph_ = cmph_new(cmph_config); - cmph_config_destroy(cmph_config); - cmph_io_struct_vector_adapter_destroy(source); + table_.Reset(make_iterator_first(values_.begin()), + make_iterator_first(values_.end())); std::vector new_values(values_.size()); - for (int i = 0; i < values_.size(); ++i) { - size_type id = cmph_search(cmph_, reinterpret_cast(&(values_[i].first)), sizeof(key_type)); + for (unsigned int i = 0; i < values_.size(); ++i) { + size_type id = table_.index(values_[i].first); new_values[id] = values_[i]; } values_.swap(new_values); @@ -110,8 +117,7 @@ CMPH_METHOD_DECL(bool_type, empty)() const { return values_.empty(); } CMPH_METHOD_DECL(void_type, clear)() { values_.clear(); slack_.clear(); - cmph_destroy(cmph_); - cmph_ = NULL; + table_.clear(); } CMPH_METHOD_DECL(void_type, erase)(iterator pos) { @@ -129,9 +135,8 @@ CMPH_METHOD_DECL(const_iterator, find)(const key_type& k) const { typename slack_type::const_iterator it = slack_.find(k); if (it != slack_.end()) return values_.begin() + it->second; } - if (!cmph_) return end(); - size_type id = cmph_search(cmph_, reinterpret_cast(&k), - sizeof(key_type)); + if (table_.size() == 0) return end(); + size_type id = table_.index(k); if (key_equal()(values_[id].first, k)) { return values_.begin() + id; } @@ -142,9 +147,8 @@ CMPH_METHOD_DECL(iterator, find)(const key_type& k) { typename slack_type::const_iterator it = slack_.find(k); if (it != slack_.end()) return values_.begin() + it->second; } - if (!cmph_) return end(); - size_type id = cmph_search(cmph_, reinterpret_cast(&k), - sizeof(key_type)); + if (table_.size() == 0) return end(); + size_type id = table_.index(k); if (key_equal()(values_[id].first, k)) { return values_.begin() + id; } @@ -155,3 +159,5 @@ CMPH_METHOD_DECL(iterator, find)(const key_type& k) { CMPH_METHOD_DECL(data_type&, operator[])(const key_type& k) { return insert(std::make_pair(k, data_type())).first->second; } + +} // namespace cxxmph diff --git a/cxxmph/cmph_hash_map_test.cc b/cxxmph/cmph_hash_map_test.cc index ad6961d..c70af58 100644 --- a/cxxmph/cmph_hash_map_test.cc +++ b/cxxmph/cmph_hash_map_test.cc @@ -1,19 +1,31 @@ #include "cmph_hash_map.h" +#include #include +#include + +using std::string; +using cxxmph::cmph_hash_map; int main(int argc, char** argv) { - cmph_hash_map h; - h.insert(std::make_pair(-1,-1)); - for (cmph_hash_map::const_iterator it = h.begin(); it != h.end(); ++it) { + cmph_hash_map h; + h.insert(std::make_pair("-1",-1)); + cmph_hash_map::const_iterator it; + for (it = h.begin(); it != h.end(); ++it) { std::cout << it->first << " -> " << it->second << std::endl; } - std::cout << "Search -1 gives " << h.find(-1)->second << std::endl; - for (int i = 0; i < 1000; ++i) h.insert(std::make_pair(i, i)); + std::cout << "Search -1 gives " << h.find("-1")->second << std::endl; + for (int i = 0; i < 1000; ++i) { + char buf[10]; + snprintf(buf, 10, "%d", i); + h.insert(std::make_pair(buf, i)); + } for (int j = 0; j < 1000; ++j) { for (int i = 1000; i > 0; --i) { - h.find(i - 1); - // std::cout << "Search " << i - 1 << " gives " << h.find(i - 1)->second << std::endl; + char buf[10]; + snprintf(buf, 10, "%d", i - 1); + h.find(buf); + // std::cout << "Search " << i - 1 << " gives " << h.find(i - 1)->second << std::endl; } } } diff --git a/cxxmph/mphtable.cc b/cxxmph/mphtable.cc index 8d461c2..0b899da 100644 --- a/cxxmph/mphtable.cc +++ b/cxxmph/mphtable.cc @@ -44,6 +44,9 @@ cmph_uint32 get_2bit_value(const vector& d, cmph_uint8 i) { namespace cxxmph { +void MPHTable::clear() { + // TODO(davi) impolement me +} bool MPHTable::GenerateQueue( TriGraph* graph, vector* queue_output) { cmph_uint32 queue_head = 0, queue_tail = 0; diff --git a/cxxmph/mphtable.h b/cxxmph/mphtable.h index d130ace..ad8cc13 100644 --- a/cxxmph/mphtable.h +++ b/cxxmph/mphtable.h @@ -30,6 +30,8 @@ class MPHTable { template bool Reset(ForwardIterator begin, ForwardIterator end); cmph_uint32 index(const key_type& x) const; + cmph_uint32 size() const { return m_; } + void clear(); private: template