Small fixes, more comments.
This commit is contained in:
parent
d3b3b3dfba
commit
d4ee76b7bf
@ -1,5 +1,5 @@
|
||||
TESTS = $(check_PROGRAMS)
|
||||
check_PROGRAMS = mph_map_test mph_index_test
|
||||
check_PROGRAMS = mph_map_test mph_index_test trigraph_test
|
||||
noinst_PROGRAMS = bm_index bm_map
|
||||
bin_PROGRAMS = cxxmph
|
||||
lib_LTLIBRARIES = libcxxmph.la
|
||||
@ -17,6 +17,9 @@ mph_index_test_SOURCES = mph_index_test.cc
|
||||
bm_index_LDADD = libcxxmph.la
|
||||
bm_index_SOURCES = bm_common.cc bm_index.cc
|
||||
|
||||
trigraph_test_LDADD = libcxxmph.la
|
||||
trigraph_test_SOURCES = trigraph_test.cc
|
||||
|
||||
bm_map_LDADD = libcxxmph.la
|
||||
bm_map_SOURCES = bm_common.cc bm_map.cc
|
||||
|
||||
|
@ -2,6 +2,25 @@
|
||||
#define __CXXMPH_MPH_INDEX_H__
|
||||
|
||||
// Minimal perfect hash abstraction implementing the BDZ algorithm
|
||||
//
|
||||
// This is a data structure that given a set of known keys S, will create a
|
||||
// mapping from S to [0..|S|). The class is informed about S through the Reset
|
||||
// method and the mapping is queried by calling index(key).
|
||||
//
|
||||
// This is a pretty uncommon data structure, and if you application has a real
|
||||
// use case for it, chances are that it is a real win. If all you are doing is
|
||||
// a straightforward implementation of an in-memory associative mapping data
|
||||
// structure (e.g., mph_map.h), then it will probably be slower, since that the
|
||||
// evaluation of index() is typically slower than the total cost of running a
|
||||
// traditional hash function over a key and doing 2-3 conflict resolutions on
|
||||
// 100byte-ish strings.
|
||||
//
|
||||
// Notes:
|
||||
//
|
||||
// Most users can use the SimpleMPHIndex wrapper instead of the MPHIndex which
|
||||
// have confusing template parameters.
|
||||
// This class only implements a minimal perfect hash function, it does not
|
||||
// implement an associative mapping data structure.
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
@ -31,16 +50,20 @@ class MPHIndex {
|
||||
template <class SeededHashFcn, class ForwardIterator>
|
||||
bool Reset(ForwardIterator begin, ForwardIterator end);
|
||||
template <class SeededHashFcn, class Key> // must agree with Reset
|
||||
// Get a unique identifier for k, in the range [0;size()). If x wasn't part
|
||||
// of the input in the last Reset call, returns a random value.
|
||||
uint32_t index(const Key& x) const;
|
||||
uint32_t size() const { return m_; }
|
||||
void clear();
|
||||
|
||||
// Advanced users functions. Please avoid unless you know what you are doing.
|
||||
uint32_t perfect_hash_size() const { return n_; }
|
||||
template <class SeededHashFcn, class Key> // must agree with Reset
|
||||
uint32_t perfect_hash(const Key& x) const;
|
||||
template <class SeededHashFcn, class Key> // must agree with Reset
|
||||
uint32_t minimal_perfect_hash(const Key& x) const;
|
||||
// Serialization machinery for mmap usage.
|
||||
|
||||
// Serialization for mmap usage - not tested well, ping me if you care.
|
||||
// Serialized tables are not guaranteed to work across versions or different
|
||||
// endianness (although they could easily be made to be).
|
||||
uint32_t serialize_bytes_needed() const;
|
||||
@ -110,7 +133,7 @@ bool MPHIndex::Reset(ForwardIterator begin, ForwardIterator end) {
|
||||
|
||||
// cerr << "m " << m_ << " n " << n_ << " r " << r_ << endl;
|
||||
|
||||
int iterations = 10;
|
||||
int iterations = 1000;
|
||||
std::vector<TriGraph::Edge> edges;
|
||||
std::vector<uint32_t> queue;
|
||||
while (1) {
|
||||
@ -176,6 +199,8 @@ uint32_t MPHIndex::index(const Key& key) const {
|
||||
return minimal_perfect_hash<SeededHashFcn, Key>(key);
|
||||
}
|
||||
|
||||
// Simple wrapper around MPHIndex to simplify calling code. Please refer to the
|
||||
// MPHIndex class for documentation.
|
||||
template <class Key, class HashFcn = typename seeded_hash<std::tr1::hash<Key> >::hash_function>
|
||||
class SimpleMPHIndex : public MPHIndex {
|
||||
public:
|
||||
|
@ -1,3 +1,10 @@
|
||||
// Implementation of the unordered associative mapping interface using a
|
||||
// minimal perfect hash function.
|
||||
//
|
||||
// This class is about 20% to 100% slower than unordered_map (or ext/hash_map)
|
||||
// and should not be used if performance is a concern. In fact, you should only
|
||||
// use it for educational purposes.
|
||||
|
||||
#include <algorithm>
|
||||
#include <tr1/unordered_map>
|
||||
#include <vector>
|
||||
@ -58,6 +65,7 @@ class mph_map {
|
||||
const data_type& operator[](const key_type &k) const;
|
||||
|
||||
size_type bucket_count() const { return size(); }
|
||||
// FIXME: not sure if this has the semantics I want
|
||||
void rehash(size_type nbuckets /*ignored*/) { pack(); }
|
||||
|
||||
protected: // mimicking STL implementation
|
||||
@ -156,7 +164,7 @@ MPH_MAP_METHOD_DECL(const_iterator, find)(const key_type& k) const {
|
||||
if (it != slack_.end()) return values_.begin() + it->second;
|
||||
}
|
||||
if (__builtin_expect(index_.size() == 0, 0)) return end();
|
||||
auto it = values_.begin() + index_.index(k);
|
||||
const_iterator it = values_.begin() + index_.index(k);
|
||||
if (__builtin_expect(equal_(k, it->first), 1)) return it;
|
||||
return end();
|
||||
}
|
||||
@ -167,7 +175,7 @@ MPH_MAP_METHOD_DECL(iterator, find)(const key_type& k) {
|
||||
if (it != slack_.end()) return values_.begin() + it->second;
|
||||
}
|
||||
if (index_.size() == 0) return end();
|
||||
auto it = values_.begin() + index_.index(k);
|
||||
iterator it = values_.begin() + index_.index(k);
|
||||
if (equal_(it->first, k)) return it;
|
||||
return end();
|
||||
}
|
||||
|
@ -174,6 +174,8 @@ inline bool operator>=(const StringPiece& x, StringPiece& y) {
|
||||
} // namespace cxxmph
|
||||
|
||||
// allow StringPiece to be logged
|
||||
extern std::ostream& operator<<(std::ostream& o, const cxxmph::StringPiece& piece);
|
||||
inline std::ostream& operator<<(std::ostream& o, const cxxmph::StringPiece& piece) {
|
||||
o << piece.as_string(); return o;
|
||||
}
|
||||
|
||||
#endif // CXXMPH_STRINGPIECE_H__
|
||||
|
22
cxxmph/trigraph_test.cc
Normal file
22
cxxmph/trigraph_test.cc
Normal file
@ -0,0 +1,22 @@
|
||||
#include <cassert>
|
||||
|
||||
#include "trigraph.h"
|
||||
|
||||
using cxxmph::TriGraph;
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
TriGraph g(4, 2);
|
||||
g.AddEdge(TriGraph::Edge(0, 1, 2));
|
||||
g.AddEdge(TriGraph::Edge(1, 3, 2));
|
||||
assert(g.vertex_degree()[0] == 1);
|
||||
assert(g.vertex_degree()[1] == 2);
|
||||
assert(g.vertex_degree()[2] == 2);
|
||||
assert(g.vertex_degree()[3] == 1);
|
||||
g.RemoveEdge(0);
|
||||
assert(g.vertex_degree()[0] == 0);
|
||||
assert(g.vertex_degree()[1] == 1);
|
||||
assert(g.vertex_degree()[2] == 1);
|
||||
assert(g.vertex_degree()[3] == 1);
|
||||
std::vector<TriGraph::Edge> edges;
|
||||
g.ExtractEdgesAndClear(&edges);
|
||||
}
|
Loading…
Reference in New Issue
Block a user