From 724e716d673087757f950ee5b98b33a44d426e03 Mon Sep 17 00:00:00 2001 From: Davi de Castro Reis Date: Sun, 24 Oct 2010 19:12:47 -0700 Subject: [PATCH] Added murmur hash and finished porting all c code. --- cxxmph/Makefile.am | 7 +- cxxmph/MurmurHash2.h | 64 ++++++++++ cxxmph/cmph_hash_map.h | 2 - cxxmph/mphtable.cc | 234 +++++++++++++++++++++++++--------- cxxmph/mphtable.h | 63 +++++---- cxxmph/mphtable_test.cc | 22 ++++ cxxmph/randomly_seeded_hash.h | 24 ++++ cxxmph/stringpiece.h | 177 +++++++++++++++++++++++++ cxxmph/trigraph.cc | 41 +++++- cxxmph/trigraph.h | 39 ++++-- 10 files changed, 569 insertions(+), 104 deletions(-) create mode 100644 cxxmph/MurmurHash2.h create mode 100644 cxxmph/mphtable_test.cc create mode 100644 cxxmph/randomly_seeded_hash.h create mode 100644 cxxmph/stringpiece.h diff --git a/cxxmph/Makefile.am b/cxxmph/Makefile.am index e29b81e..da7fa84 100644 --- a/cxxmph/Makefile.am +++ b/cxxmph/Makefile.am @@ -1,8 +1,11 @@ -bin_PROGRAMS = cmph_hash_map_test +bin_PROGRAMS = cmph_hash_map_test mphtable_test lib_LTLIBRARIES = libcxxmph.la -libcxxmph_la_SOURCES = trigragh.h trigraph.cc +libcxxmph_la_SOURCES = stringpiece.h MurmurHash2.h randomly_seeded_hash.h trigragh.h trigraph.cc mphtable.h mphtable.cc libcxxmph_la_LDFLAGS = -version-info 0:0:0 cmph_hash_map_test_LDADD = libcxxmph.la cmph_hash_map_test_SOURCES = cmph_hash_map_test.cc + +mphtable_test_LDADD = libcxxmph.la +mphtable_test_SOURCES = mphtable_test.cc diff --git a/cxxmph/MurmurHash2.h b/cxxmph/MurmurHash2.h new file mode 100644 index 0000000..81051fe --- /dev/null +++ b/cxxmph/MurmurHash2.h @@ -0,0 +1,64 @@ +//----------------------------------------------------------------------------- +// MurmurHash2, by Austin Appleby + +// Note - This code makes a few assumptions about how your machine behaves - + +// 1. We can read a 4-byte value from any address without crashing +// 2. sizeof(int) == 4 + +// And it has a few limitations - + +// 1. It will not work incrementally. +// 2. It will not produce the same results on little-endian and big-endian +// machines. + +unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed ) +{ + // 'm' and 'r' are mixing constants generated offline. + // They're not really 'magic', they just happen to work well. + + const unsigned int m = 0x5bd1e995; + const int r = 24; + + // Initialize the hash to a 'random' value + + unsigned int h = seed ^ len; + + // Mix 4 bytes at a time into the hash + + const unsigned char * data = (const unsigned char *)key; + + while(len >= 4) + { + unsigned int k = *(unsigned int *)data; + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + data += 4; + len -= 4; + } + + // Handle the last few bytes of the input array + + switch(len) + { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; + h *= m; + }; + + // Do a few final mixes of the hash to ensure the last few + // bytes are well-incorporated. + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} diff --git a/cxxmph/cmph_hash_map.h b/cxxmph/cmph_hash_map.h index 3923dc8..ac061ea 100644 --- a/cxxmph/cmph_hash_map.h +++ b/cxxmph/cmph_hash_map.h @@ -2,8 +2,6 @@ #include #include // for std::pair -#include - // Save on repetitive typing. #define CMPH_TMPL_SPEC template #define CMPH_CLASS_SPEC cmph_hash_map diff --git a/cxxmph/mphtable.cc b/cxxmph/mphtable.cc index 7b79d0d..2c5ba32 100644 --- a/cxxmph/mphtable.cc +++ b/cxxmph/mphtable.cc @@ -1,105 +1,213 @@ -#include +#include #include "mphtable.h" using std::vector; +namespace cxxmph { + template template - -void MPHTable::Reset(ForwardIterator begin, ForwardIterator end) { - TableBuilderState st; - st.c = 1.23; - st.b = 7; - st.m = end - begin; - st.r = static_cast(ceil((st.c*st.m)/3)); - if ((st.r % 2) == 0) st.r += 1; - st.n = 3*st.r; - st.k = 1U << st.b; - st.ranktablesize = static_cast( - ceil(st.n / static_cast(st.k))); - st.graph_builder = TriGraph(st.m, st.n); // giant copy - st.edges_queue.resize(st.m) +bool MPHTable::Reset(ForwardIterator begin, ForwardIterator end) { + TableBuilderState st; + m_ = end - begin; + r_ = static_cast(ceil((c_*m_)/3)); + if (r_ % 2) == 0) r_ += 1; + n_ = 3*r_; + k_ = 1U << b_; int iterations = 1000; while (1) { - hasher hasher0 = HashFcn(); - ok = Mapping(st.graph_builder, st.edges_queue); - if (ok) break; + for (int i = 0; i < 3; ++i) hash_function_[i] = hasher(); + vector edges; + vector queue; + if (Mapping(begin, end, &edges, &queue)) break; else --iterations; if (iterations == 0) break; } if (iterations == 0) return false; - vector graph; - st.graph_builder.ExtractEdgesAndClear(&graph); - Assigning(graph, st.edges_queue); - vector().swap(st.edges_queue); - Ranking(graph); + vector& edges; + graph->ExtractEdgesAndClear(&edges); + Assigning(queue, edges); + vector().swap(edges); + Ranking(); } template -int MPHTable::GenerateQueue( - cmph_uint32 nedges, cmph_uint32 nvertices, - TriGraph* graph, Queue* queue) { +bool MPHTable::GenerateQueue( + TriGraph* graph, vector* queue_output) { cmph_uint32 queue_head = 0, queue_tail = 0; + cmph_uint32 nedges = n_; + cmph_uint32 nvertices = m_; // Relies on vector using 1 bit per element vector marked_edge((nedges >> 3) + 1, false); - queue->swap(Queue(nvertices, 0)); + Queue queue(nvertices, 0); for (int i = 0; i < nedges; ++i) { - TriGraph::Edge e = graph.edges[i].vertices; - if (graph.vertex_degree_[e.vertices[0]] == 1 || - graph.vertex_degree_[e.vertices[1]] == 1 || - graph.vertex_degree[e.vertices[2]] == 1) { + const TriGraph::Edge& e = graph->edges()[i]; + if (graph->vertex_degree()[e[0]] == 1 || + graph->vertex_degree()[e[1]] == 1 || + graph->vertex_degree()[e[2]] == 1) { if (!marked_edge[i]) { - (*queue)[queue_head++] = i; + queue[queue_head++] = i; marked_edge[i] = true; } } } while (queue_tail != queue_head) { - cmph_uint32 current_edge = (*queue)[queue_tail++]; + cmph_uint32 current_edge = queue[queue_tail++]; graph->RemoveEdge(current_edge); - TriGraph::Edge e = graph->edges[current_edge]; + const TriGraph::Edge& e = graph->edges()[current_edge]; for (int i = 0; i < 3; ++i) { - cmph_uint32 v = e.vertices[i]; - if (graph->vertex_degree[v] == 1) { - cmph_uint32 first_edge = graph->first_edge_[v]; - if (!marked_edge[first_edge) { + cmph_uint32 v = e[i]; + if (graph->vertex_degree()[v] == 1) { + cmph_uint32 first_edge = graph->first_edge()[v]; + if (!marked_edge[first_edge]) { queue[queue_head++] = first_edge; marked_edge[first_edge] = true; } } } } - vector().swap(marked_edge); - return queue_head - nedges; -} - -template -int MPHTable::Mapping(TriGraph* graph, Queue* queue) { - int cycles = 0; - graph->Reset(m, n); - for (ForwardIterator it = begin_; it != end_; ++it) { - cmph_uint32 hash_values[3]; - for (int i = 0; i < 3; ++i) { - hash_values[i] = hasher_(*it); - } - cmph_uint32 v0 = hash_values[0] % bdz->r; - cmph_uint32 v1 = hash_values[1] % bdz->r + bdz->r; - cmph_uint32 v2 = hash_values[2] % bdz->r + (bdz->r << 1); - graph->AddEdge(Edge(v0, v1, v2)); - } - cycles = GenerateQueue(bdz->m, bdz->n, queue, graph); + int cycles = queue_head - nedges; + if (cycles == 0) queue.swap(*queue_output); return cycles == 0; } -void MPHTable::Assigning(TriGraph* graph, Queue* queue) { -} -void MPHTable::Ranking(TriGraph* graph, Queue* queue) { -} -cmph_uint32 MPHTable::Search(const key_type& key) { +template +template +bool MPHTable::Mapping( + ForwardIterator begin, ForwardIterator end, + vector* edges, vector queue) { + int cycles = 0; + TriGraph graph(m, n); + for (ForwardIterator it = begin; it != end; ++it) { + cmph_uint32 h[3]; + for (int i = 0; i < 3; ++i) h[i] = hash_function_[i](*it); + cmph_uint32 v0 = h[0] % r_; + cmph_uint32 v1 = h[1] % r_ + r_; + cmph_uint32 v2 = h[2] % r_ + (r_ << 1); + graph.AddEdge(Edge(v0, v1, v2)); + } + if (GenerateQueue(&graph, queue)) { + graph.ExtractEdgesAndClear(edges); + return true; + } + return false; } -cmph_uint32 MPHTable::Rank(const key_type& key) { +template +void MPHTable::Assigning( + const vector& edges, const vector& queue) { + cmph_uint32 nedges = n_; + cmph_uint32 current_edge = 0; + vector marked_vertices(nedges + 1); + // TODO(davi) use half nibbles instead + // vector g(static_cast(ceil(nedges / 4.0)), + // std::numerical_limits::max()); + static const cmph_uint8 kUnassigned = 3; + vector(nedges, kUnassigned).swap(g_); + for (int i = nedges - 1; i + 1 >= 1; --i) { + current_edge = queue[i]; + const TriGraph::Edge& e = edges[current_edge]; + if (!marked_vertices[e[0]]) { + if (!marked_vertices[e[1]]) { + g_[e[1]] = kUnassigned; + marked_vertices[e[1]] = true; + } + if (!marked_vertices[e[2]]) { + g_[e[2]] = kUnassigned; + marked_vertices[e[2]] = true; + } + g_[e[0]] = (6 - g_[e[1]] + g_[e2]) % 3; + marked_vertices[e[0]] = true; + } else if (!marked_vertices[e[1]])) { + if (!marked_vertices[e[2]])) { + g_[e[2]] = kUnassigned; + marked_vertices[e[2]] = true; + } + g_[e[1]] = 7 - (g_[e[0]] + g_[e[2]]) % 3; + marked_vertices[e[1]] = true; + } else { + g_[e[2]] = (8 - g_[e[0]] + g_[e[1]]) % 3; + marked_vertices[e[2]] = true; + } + } } + +// table used for looking up the number of assigned vertices to a 8-bit integer +static cmph_uint8 kBdzLookupTable[] = +{ +4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2, +4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2, +4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2, +3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1, +4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2, +4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2, +4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2, +3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1, +4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2, +4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2, +4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2, +3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1, +3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1, +3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1, +3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1, +2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1, 0 +}; + +template +void MPHTable::Ranking() { + cmph_uint32 nbytes_total = static_cast(ceil(st->n / 4.0)); + cmph_uint32 size = k_ >> 2U; + ranktablesize = static_cast(ceil(n_ / static_cast(k_))); + // TODO(davi) Change swap of member classes for resize + memset to avoid fragmentation + vector (ranktablesize).swap(ranktable_);; + cmph_uint32 offset = 0; + cmph_uint32 count = 0; + cmph_uint32 i = 0; + while (1) { + if (i == ranktable.size()) break; + cmph_uint32 nbytes = size < nbytes_total ? size : nbytes_total; + for (j = 0; j < nbytes; ++j) count += kBdzLookupTable[g_[offset + j]]; + ranktable_[i] = count; + offset += nbytes; + nbytes_total -= size; + ++i; + } +} + +template +cmph_uint32 MPHTable::Search(const key_type& key) const { + cmph_uint32 vertex; + cmph_uint32 h[3]; + for (int i = 0; i < 3; ++i) h[i] = hash_function_[i](key); + h[0] = h[0] % st->r; + h[1] = h[1] % st->r + st->r; + h[2] = h[2] % st->r + (st->r << 1); + cmph_uint32 vertex = h[(h[g_[h[0]] + g_[h[1]] + g_[h[2]]) % 3]; + return Rank(st->b, st->ranktable, vertex); +} + +template +cmph_uint32 MPHTable::Rank(cmph_uint32 vertex) const { + cmph_uint32 index = vertex >> b_; + cmph_uint32 base_rank = ranktable_[index]; + cmph_uint32 beg_idx_v = index << b; + cmph_uint32 beg_idx_b = index >> 2 + cmph_uint32 end_idx_b = index >> 2 + while (beg_idx_b < end_idx_b) base_rank += kBdzLookupTable[g_[beg_idx_b++]]; + beg_idx_v = beg_idx_b << 2; + while (beg_idx_v < vertex) { + if (g_[beg_idx_v) != kUnassigned) ++base_rank; + ++beg_idx_v; + } + return base_rank; +} + +template +cmph_uint32 MPHTable::index(const key_type& key) const { + return Search(key); +} + +} // namespace cxxmph diff --git a/cxxmph/mphtable.h b/cxxmph/mphtable.h index 309ce7f..eccff61 100644 --- a/cxxmph/mphtable.h +++ b/cxxmph/mphtable.h @@ -1,15 +1,22 @@ +#ifndef __CXXMPH_MPHTABLE_H__ +#define __CXXMPH_MPHTABLE_H__ + // Minimal perfect hash abstraction implementing the BDZ algorithm #include +#include "randomly_seeded_hash.h" +#include "stringpiece.h" #include "trigraph.h" -template > +namespace cxxmph { + +template class MPHTable { public: typedef Key key_type; typedef NewRandomlySeededHashFcn hasher; - MPHTable(); + MPHTable(double c = 1.23, cmph_uint8 b = 7) : c_(c), b_(b) { } ~MPHTable(); template @@ -17,28 +24,38 @@ class MPHTable { cmph_uint32 index(const key_type& x) const; private: - typedef std::vector Queue; - template - struct TableBuilderState { - ForwardIterator begin; - ForwardIterator end; - Queue edges_queue; - TriGraph graph_builder; - double c; - cmph_uint32 m; - cmph_uint32 n; - cmph_uint32 k; - cmph_uint32 ranktablesize; - }; - int GenerateQueue( - cmph_uint32 nedges, cmph_uint32 nvertices, - TriGraph* graph, Queue* queue); - void Assigning(TriGraph* graph, Queue* queue); - void Ranking(TriGraph* graph, Queue* queue); - cmph_uint32 Search(const StringPiece& key); - cmph_uint32 Rank(const StringPiece& key); + template + bool Mapping(ForwardIterator begin, ForwardIterator end, + vector* edges, vector queue); + bool GenerateQueue(TriGraph* graph, vector* queue); + void Assigning(TriGraph* graph_builder, Queue* queue); + void Ranking(TriGraph* graph_builder, Queue* queue); + cmph_uint32 Search(const StringPiece& key); + cmph_uint32 Rank(const StringPiece& key); - std::vector graph_; + // Algorithm parameters + cmph_uint8 b_; // Number of bits of the kth index in the ranktable + double c_; // Number of bits per key (? is it right) + + // Values used during generation + cmph_uint32 m_; // edges count + cmph_uint32 n_; // vertex count + cmph_uint32 k_ // kth index in ranktable, $k = log_2(n=3r)\varepsilon$ + + // Values used during search + + // Partition vertex count, derived from c parameter. + cmph_uint32 r_; + // The array containing the minimal perfect hash function graph. + std::vector g_; + // The table used for the rank step of the minimal perfect hash function + std::vector ranktable_; + // The selected hash function triplet for finding the edges in the minimal + // perfect hash function graph. + hasher hash_function_[3]; + }; +} // namespace cxxmph +#define // __CXXMPH_MPHTABLE_H__ diff --git a/cxxmph/mphtable_test.cc b/cxxmph/mphtable_test.cc new file mode 100644 index 0000000..e18b34d --- /dev/null +++ b/cxxmph/mphtable_test.cc @@ -0,0 +1,22 @@ +#include +#include + +#include "mphtable.h" + +using std::vector; +using cxxmph::MPHTable; + +int main(int argc, char** argv) { + vector keys; + keys.push_back(10); + keys.push_back(4); + keys.push_back(3); + + MPHTable mphtable; + assert(mphtable.Reset(keys.begin(), keys.end())); + vector ids; + for (int i = 0; i < keys.size(); ++i) ids.push_back(mphtable.index(keys[i])); + sort(ids.begin(), ids.end()); + for (int i = 0; i < ids.size(); ++i) assert(ids[i] == i); +} + diff --git a/cxxmph/randomly_seeded_hash.h b/cxxmph/randomly_seeded_hash.h new file mode 100644 index 0000000..69db56a --- /dev/null +++ b/cxxmph/randomly_seeded_hash.h @@ -0,0 +1,24 @@ +#ifndef __CXXMPH_RANDOMLY_SEEDED_HASH__ +#define __CXXMPH_RANDOMLY_SEEDED_HASH__ + +// Helper to create randomly seeded hash functions out of existing hash +// functions that take a seed as a parameter. + +#include + +#include "../src/cmph_types.h" +#include "MurmurHash2.h" + +namespace cxxmph { + +struct RandomlySeededMurmur2 { + RandomlySeededHashFunction() : seed(random()) { } + cmph_uint32 operator()(const StringPiece& key) { + return MurmurHash2(key.data(), key.length(), seed); + } + cmph_uint32 seed; +}; + +} // namespace cxxmph + +#endif // __CXXMPH_RANDOMLY_SEEDED_HASH__ diff --git a/cxxmph/stringpiece.h b/cxxmph/stringpiece.h new file mode 100644 index 0000000..fdd8f75 --- /dev/null +++ b/cxxmph/stringpiece.h @@ -0,0 +1,177 @@ +// Copyright 2001-2010 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// A string-like object that points to a sized piece of memory. +// +// Functions or methods may use const StringPiece& parameters to accept either +// a "const char*" or a "string" value that will be implicitly converted to +// a StringPiece. The implicit conversion means that it is often appropriate +// to include this .h file in other files rather than forward-declaring +// StringPiece as would be appropriate for most other Google classes. +// +// Systematic usage of StringPiece is encouraged as it will reduce unnecessary +// conversions from "const char*" to "string" and back again. +// +// +// Arghh! I wish C++ literals were "string". + +#ifndef CXXMPH_STRINGPIECE_H__ +#define CXXMPH_STRINGPIECE_H__ + +#include +#include +#include + +namespace cxxmph { + +class StringPiece { + private: + const char* ptr_; + int length_; + + public: + // We provide non-explicit singleton constructors so users can pass + // in a "const char*" or a "string" wherever a "StringPiece" is + // expected. + StringPiece() : ptr_(NULL), length_(0) { } + StringPiece(const char* str) + : ptr_(str), length_((str == NULL) ? 0 : static_cast(strlen(str))) { } + StringPiece(const std::string& str) + : ptr_(str.data()), length_(static_cast(str.size())) { } + StringPiece(const char* offset, int len) : ptr_(offset), length_(len) { } + + // data() may return a pointer to a buffer with embedded NULs, and the + // returned buffer may or may not be null terminated. Therefore it is + // typically a mistake to pass data() to a routine that expects a NUL + // terminated string. + const char* data() const { return ptr_; } + int size() const { return length_; } + int length() const { return length_; } + bool empty() const { return length_ == 0; } + + void clear() { ptr_ = NULL; length_ = 0; } + void set(const char* data, int len) { ptr_ = data; length_ = len; } + void set(const char* str) { + ptr_ = str; + if (str != NULL) + length_ = static_cast(strlen(str)); + else + length_ = 0; + } + void set(const void* data, int len) { + ptr_ = reinterpret_cast(data); + length_ = len; + } + + char operator[](int i) const { return ptr_[i]; } + + void remove_prefix(int n) { + ptr_ += n; + length_ -= n; + } + + void remove_suffix(int n) { + length_ -= n; + } + + int compare(const StringPiece& x) const { + int r = memcmp(ptr_, x.ptr_, std::min(length_, x.length_)); + if (r == 0) { + if (length_ < x.length_) r = -1; + else if (length_ > x.length_) r = +1; + } + return r; + } + + std::string as_string() const { + return std::string(data(), size()); + } + // We also define ToString() here, since many other string-like + // interfaces name the routine that converts to a C++ string + // "ToString", and it's confusing to have the method that does that + // for a StringPiece be called "as_string()". We also leave the + // "as_string()" method defined here for existing code. + std::string ToString() const { + return std::string(data(), size()); + } + + void CopyToString(std::string* target) const; + void AppendToString(std::string* target) const; + + // Does "this" start with "x" + bool starts_with(const StringPiece& x) const { + return ((length_ >= x.length_) && + (memcmp(ptr_, x.ptr_, x.length_) == 0)); + } + + // Does "this" end with "x" + bool ends_with(const StringPiece& x) const { + return ((length_ >= x.length_) && + (memcmp(ptr_ + (length_-x.length_), x.ptr_, x.length_) == 0)); + } + + // standard STL container boilerplate + typedef char value_type; + typedef const char* pointer; + typedef const char& reference; + typedef const char& const_reference; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + static const size_type npos; + typedef const char* const_iterator; + typedef const char* iterator; + typedef std::reverse_iterator const_reverse_iterator; + typedef std::reverse_iterator reverse_iterator; + iterator begin() const { return ptr_; } + iterator end() const { return ptr_ + length_; } + const_reverse_iterator rbegin() const { + return const_reverse_iterator(ptr_ + length_); + } + const_reverse_iterator rend() const { + return const_reverse_iterator(ptr_); + } + // STLS says return size_type, but Google says return int + int max_size() const { return length_; } + int capacity() const { return length_; } + + int copy(char* buf, size_type n, size_type pos = 0) const; + + int find(const StringPiece& s, size_type pos = 0) const; + int find(char c, size_type pos = 0) const; + int rfind(const StringPiece& s, size_type pos = npos) const; + int rfind(char c, size_type pos = npos) const; + + StringPiece substr(size_type pos, size_type n = npos) const; +}; + +} // namespace cxxmph + +bool operator==(const cxxmph::StringPiece& x, const cxxmph::StringPiece& y); + +inline bool operator!=(const cxxmph::StringPiece& x, const cxxmph::StringPiece& y) { + return !(x == y); +} + +inline bool operator<(const cxxmph::StringPiece& x, const cxxmph::StringPiece& y) { + const int r = memcmp(x.data(), y.data(), + std::min(x.size(), y.size())); + return ((r < 0) || ((r == 0) && (x.size() < y.size()))); +} + +inline bool operator>(const cxxmph::StringPiece& x, const cxxmph::StringPiece& y) { + return y < x; +} + +inline bool operator<=(const cxxmph::StringPiece& x, const cxxmph::StringPiece& y) { + return !(x > y); +} + +inline bool operator>=(const cxxmph::StringPiece& x, const cxxmph::StringPiece& y) { + return !(x < y); +} + +// allow StringPiece to be logged +extern std::ostream& operator<<(std::ostream& o, const cxxmph::StringPiece& piece); + +#endif // CXXMPH_STRINGPIECE_H__ diff --git a/cxxmph/trigraph.cc b/cxxmph/trigraph.cc index 89b6721..63c36e1 100644 --- a/cxxmph/trigraph.cc +++ b/cxxmph/trigraph.cc @@ -1,3 +1,4 @@ +#include #include #include "trigraph.h" @@ -8,17 +9,51 @@ namespace { static const cmph_uint8 kInvalidEdge = std::numeric_limits::max(); } +namespace cxxmph { + TriGraph::TriGraph(cmph_uint32 nedges, cmph_uint32 nvertices) : nedges_(0), edges_(nedges), first_edge_(nvertices, kInvalidEdge), vertex_degree_(nvertices, 0) { } -void TriGraph::ExtractEdgesAndClear(vector* edges) { +void TriGraph::ExtractEdgesAndClear(vector* edges) { + vector().swap(next_edge_); vector().swap(first_edge_); vector().swap(vertex_degree_); nedges_ = 0; edges->swap(edges_); } -void TriGraph::AddEdge(const Edge& edge) { } -void TriGraph::RemoveEdge(cmph_uint32 current_edge) { } +void TriGraph::AddEdge(const Edge& edge) { + edges_[nedges_] = edge; + next_edge_[nedges_] = Edge( + first_edge_[edge[0]], first_edge_[edge[1]], first_edge_[edge[2]]); + first_edge_[edge[0]] = first_edge_[edge[1]] = first_edge_[edge[2]] = nedges_; + ++vertex_degree_[edge[0]]; + ++vertex_degree_[edge[1]]; + ++vertex_degree_[edge[2]]; + ++nedges_; +} + +void TriGraph::RemoveEdge(cmph_uint32 current_edge) { + cmph_uint32 vertex, edge1, edge2; + for (int i = 0; i < 3; ++i) { + cmph_uint32 vertex = edges_[current_edge][i]; + cmph_uint32 edge1 = first_edge_[vertex]; + cmph_uint32 edge2 = kInvalidEdge; + cmph_uint32 j = 0; + while (edge1 != current_edge && edge1 != kInvalidEdge) { + edge2 = edge1; + if (edges_[edge1][0] == vertex) j = 0; + else if (edges_[edge1][1] == vertex) j = 1; + else j = 2; + edge1 = next_edge_[edge1][j]; + } + assert(edge1 != kInvalidEdge); + if (edge2 != kInvalidEdge) next_edge_[edge2][j] = next_edge_[edge1][i]; + else first_edge_[vertex] = next_edge_[edge1][i]; + --vertex_degree_[vertex]; + } +} + +} // namespace cxxmph diff --git a/cxxmph/trigraph.h b/cxxmph/trigraph.h index e4f8440..9d60151 100644 --- a/cxxmph/trigraph.h +++ b/cxxmph/trigraph.h @@ -1,26 +1,43 @@ +#ifndef __CXXMPH_TRIGRAPH_H__ +#define __CXXMPH_TRIGRAPH_H__ +// Build a trigraph using a memory efficient representation. +// +// Prior knowledge of the number of edges and vertices for the graph is +// required. For each vertex, we store how many edges touch it (degree) and the +// index of the first edge in the vector of triples representing the edges. + + #include #include "../src/cmph_types.h" +namespace cxxmph { + class TriGraph { struct Edge { Edge() { } Edge(cmph_uint32 v0, cmph_uint32 v1, cmph_uint32 v2); + cmph_uint32& operator[](cmph_uint8 v) { return vertices[v]; } + const cmph_uint32& operator[](cmph_uint8 v) const { return vertices[v]; } cmph_uint32 vertices[3]; }; - struct ConnectedEdge { - Edge current; - Edge next; - }; - TriGraph(cmph_uint32 nedges, cmph_uint32 nvertices); void AddEdge(const Edge& edge); - void RemoveEdge(cmph_uint32 current_edge); - void ExtractEdgesAndClear(std::vector* edges); + void RemoveEdge(cmph_uint32 edge_id); + void ExtractEdgesAndClear(std::vector* edges); + + const std::vector& edges() const { return edges_; } + const std::vector& vertex_degree() const { return vertex_degree_; } + const std::vector& first_edge() const { return first_edge_; } private: - cmph_uint32 nedges_; - std::vector edges_; - std::vector first_edge_; - std::vector vertex_degree_; + cmph_uint32 nedges_; // total number of edges + std::vector edges_; + std::vector next_edge_; // for implementing removal + std::vector first_edge_; // the first edge for this vertex + std::vector vertex_degree_; // number of edges for this vertex }; + +} // namespace cxxmph + +#endif // __CXXMPH_TRIGRAPH_H__