Added murmur hash and finished porting all c code.

This commit is contained in:
Davi de Castro Reis 2010-10-24 19:12:47 -07:00
parent bf0c5892d8
commit 724e716d67
10 changed files with 569 additions and 104 deletions

View File

@ -1,8 +1,11 @@
bin_PROGRAMS = cmph_hash_map_test bin_PROGRAMS = cmph_hash_map_test mphtable_test
lib_LTLIBRARIES = libcxxmph.la lib_LTLIBRARIES = libcxxmph.la
libcxxmph_la_SOURCES = trigragh.h trigraph.cc libcxxmph_la_SOURCES = stringpiece.h MurmurHash2.h randomly_seeded_hash.h trigragh.h trigraph.cc mphtable.h mphtable.cc
libcxxmph_la_LDFLAGS = -version-info 0:0:0 libcxxmph_la_LDFLAGS = -version-info 0:0:0
cmph_hash_map_test_LDADD = libcxxmph.la cmph_hash_map_test_LDADD = libcxxmph.la
cmph_hash_map_test_SOURCES = cmph_hash_map_test.cc cmph_hash_map_test_SOURCES = cmph_hash_map_test.cc
mphtable_test_LDADD = libcxxmph.la
mphtable_test_SOURCES = mphtable_test.cc

64
cxxmph/MurmurHash2.h Normal file
View File

@ -0,0 +1,64 @@
//-----------------------------------------------------------------------------
// MurmurHash2, by Austin Appleby
// Note - This code makes a few assumptions about how your machine behaves -
// 1. We can read a 4-byte value from any address without crashing
// 2. sizeof(int) == 4
// And it has a few limitations -
// 1. It will not work incrementally.
// 2. It will not produce the same results on little-endian and big-endian
// machines.
unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed )
{
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
const unsigned int m = 0x5bd1e995;
const int r = 24;
// Initialize the hash to a 'random' value
unsigned int h = seed ^ len;
// Mix 4 bytes at a time into the hash
const unsigned char * data = (const unsigned char *)key;
while(len >= 4)
{
unsigned int k = *(unsigned int *)data;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
// Handle the last few bytes of the input array
switch(len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
// Do a few final mixes of the hash to ensure the last few
// bytes are well-incorporated.
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}

View File

@ -2,8 +2,6 @@
#include <vector> #include <vector>
#include <utility> // for std::pair #include <utility> // for std::pair
#include <cmph.h>
// Save on repetitive typing. // Save on repetitive typing.
#define CMPH_TMPL_SPEC template <class Key, class Data, class HashFcn, class EqualKey, class Alloc> #define CMPH_TMPL_SPEC template <class Key, class Data, class HashFcn, class EqualKey, class Alloc>
#define CMPH_CLASS_SPEC cmph_hash_map<Key, Data, HashFcn, EqualKey, Alloc> #define CMPH_CLASS_SPEC cmph_hash_map<Key, Data, HashFcn, EqualKey, Alloc>

View File

@ -1,105 +1,213 @@
#include <numerical_limits> #include <limits>
#include "mphtable.h" #include "mphtable.h"
using std::vector; using std::vector;
namespace cxxmph {
template <class Key, class HashFcn> template <class Key, class HashFcn>
template <class ForwardIterator> template <class ForwardIterator>
bool MPHTable<Key, HashFcn>::Reset(ForwardIterator begin, ForwardIterator end) {
void MPHTable::Reset(ForwardIterator begin, ForwardIterator end) { TableBuilderState<ForwardIterator> st;
TableBuilderState st; m_ = end - begin;
st.c = 1.23; r_ = static_cast<cmph_uint32>(ceil((c_*m_)/3));
st.b = 7; if (r_ % 2) == 0) r_ += 1;
st.m = end - begin; n_ = 3*r_;
st.r = static_cast<cmph_uint32>(ceil((st.c*st.m)/3)); k_ = 1U << b_;
if ((st.r % 2) == 0) st.r += 1;
st.n = 3*st.r;
st.k = 1U << st.b;
st.ranktablesize = static_cast<cmph_uint32>(
ceil(st.n / static_cast<double>(st.k)));
st.graph_builder = TriGraph(st.m, st.n); // giant copy
st.edges_queue.resize(st.m)
int iterations = 1000; int iterations = 1000;
while (1) { while (1) {
hasher hasher0 = HashFcn(); for (int i = 0; i < 3; ++i) hash_function_[i] = hasher();
ok = Mapping(st.graph_builder, st.edges_queue); vector<Edge> edges;
if (ok) break; vector<cmph_uint32> queue;
if (Mapping(begin, end, &edges, &queue)) break;
else --iterations; else --iterations;
if (iterations == 0) break; if (iterations == 0) break;
} }
if (iterations == 0) return false; if (iterations == 0) return false;
vector<ConnectedEdge> graph; vector<Edge>& edges;
st.graph_builder.ExtractEdgesAndClear(&graph); graph->ExtractEdgesAndClear(&edges);
Assigning(graph, st.edges_queue); Assigning(queue, edges);
vector<cmph_uint32>().swap(st.edges_queue); vector<cmph_uint32>().swap(edges);
Ranking(graph); Ranking();
} }
template <class Key, class HashFcn> template <class Key, class HashFcn>
int MPHTable::GenerateQueue( bool MPHTable<Key, HashFcn>::GenerateQueue(
cmph_uint32 nedges, cmph_uint32 nvertices, TriGraph* graph, vector<cmph_uint32>* queue_output) {
TriGraph* graph, Queue* queue) {
cmph_uint32 queue_head = 0, queue_tail = 0; cmph_uint32 queue_head = 0, queue_tail = 0;
cmph_uint32 nedges = n_;
cmph_uint32 nvertices = m_;
// Relies on vector<bool> using 1 bit per element // Relies on vector<bool> using 1 bit per element
vector<bool> marked_edge((nedges >> 3) + 1, false); vector<bool> marked_edge((nedges >> 3) + 1, false);
queue->swap(Queue(nvertices, 0)); Queue queue(nvertices, 0);
for (int i = 0; i < nedges; ++i) { for (int i = 0; i < nedges; ++i) {
TriGraph::Edge e = graph.edges[i].vertices; const TriGraph::Edge& e = graph->edges()[i];
if (graph.vertex_degree_[e.vertices[0]] == 1 || if (graph->vertex_degree()[e[0]] == 1 ||
graph.vertex_degree_[e.vertices[1]] == 1 || graph->vertex_degree()[e[1]] == 1 ||
graph.vertex_degree[e.vertices[2]] == 1) { graph->vertex_degree()[e[2]] == 1) {
if (!marked_edge[i]) { if (!marked_edge[i]) {
(*queue)[queue_head++] = i; queue[queue_head++] = i;
marked_edge[i] = true; marked_edge[i] = true;
} }
} }
} }
while (queue_tail != queue_head) { while (queue_tail != queue_head) {
cmph_uint32 current_edge = (*queue)[queue_tail++]; cmph_uint32 current_edge = queue[queue_tail++];
graph->RemoveEdge(current_edge); graph->RemoveEdge(current_edge);
TriGraph::Edge e = graph->edges[current_edge]; const TriGraph::Edge& e = graph->edges()[current_edge];
for (int i = 0; i < 3; ++i) { for (int i = 0; i < 3; ++i) {
cmph_uint32 v = e.vertices[i]; cmph_uint32 v = e[i];
if (graph->vertex_degree[v] == 1) { if (graph->vertex_degree()[v] == 1) {
cmph_uint32 first_edge = graph->first_edge_[v]; cmph_uint32 first_edge = graph->first_edge()[v];
if (!marked_edge[first_edge) { if (!marked_edge[first_edge]) {
queue[queue_head++] = first_edge; queue[queue_head++] = first_edge;
marked_edge[first_edge] = true; marked_edge[first_edge] = true;
} }
} }
} }
} }
vector<bool>().swap(marked_edge); int cycles = queue_head - nedges;
return queue_head - nedges; if (cycles == 0) queue.swap(*queue_output);
}
template <class Key, class HashFcn>
int MPHTable::Mapping(TriGraph* graph, Queue* queue) {
int cycles = 0;
graph->Reset(m, n);
for (ForwardIterator it = begin_; it != end_; ++it) {
cmph_uint32 hash_values[3];
for (int i = 0; i < 3; ++i) {
hash_values[i] = hasher_(*it);
}
cmph_uint32 v0 = hash_values[0] % bdz->r;
cmph_uint32 v1 = hash_values[1] % bdz->r + bdz->r;
cmph_uint32 v2 = hash_values[2] % bdz->r + (bdz->r << 1);
graph->AddEdge(Edge(v0, v1, v2));
}
cycles = GenerateQueue(bdz->m, bdz->n, queue, graph);
return cycles == 0; return cycles == 0;
} }
void MPHTable::Assigning(TriGraph* graph, Queue* queue) { template <class Key, class HashFcn>
template <class ForwardIterator>
bool MPHTable<Key, HashFcn>::Mapping(
ForwardIterator begin, ForwardIterator end,
vector<Edge>* edges, vector<cmph_uint32> queue) {
int cycles = 0;
TriGraph graph(m, n);
for (ForwardIterator it = begin; it != end; ++it) {
cmph_uint32 h[3];
for (int i = 0; i < 3; ++i) h[i] = hash_function_[i](*it);
cmph_uint32 v0 = h[0] % r_;
cmph_uint32 v1 = h[1] % r_ + r_;
cmph_uint32 v2 = h[2] % r_ + (r_ << 1);
graph.AddEdge(Edge(v0, v1, v2));
} }
void MPHTable::Ranking(TriGraph* graph, Queue* queue) { if (GenerateQueue(&graph, queue)) {
graph.ExtractEdgesAndClear(edges);
return true;
} }
cmph_uint32 MPHTable::Search(const key_type& key) { return false;
} }
cmph_uint32 MPHTable::Rank(const key_type& key) { template <class Key, class HashFcn>
void MPHTable<Key, HashFcn>::Assigning(
const vector<Edge>& edges, const vector<cmph_uint32>& queue) {
cmph_uint32 nedges = n_;
cmph_uint32 current_edge = 0;
vector<bool> marked_vertices(nedges + 1);
// TODO(davi) use half nibbles instead
// vector<cmph_uint8> g(static_cast<cmph_uint32>(ceil(nedges / 4.0)),
// std::numerical_limits<cmph_uint8>::max());
static const cmph_uint8 kUnassigned = 3;
vector<cmph_uint8>(nedges, kUnassigned).swap(g_);
for (int i = nedges - 1; i + 1 >= 1; --i) {
current_edge = queue[i];
const TriGraph::Edge& e = edges[current_edge];
if (!marked_vertices[e[0]]) {
if (!marked_vertices[e[1]]) {
g_[e[1]] = kUnassigned;
marked_vertices[e[1]] = true;
} }
if (!marked_vertices[e[2]]) {
g_[e[2]] = kUnassigned;
marked_vertices[e[2]] = true;
}
g_[e[0]] = (6 - g_[e[1]] + g_[e2]) % 3;
marked_vertices[e[0]] = true;
} else if (!marked_vertices[e[1]])) {
if (!marked_vertices[e[2]])) {
g_[e[2]] = kUnassigned;
marked_vertices[e[2]] = true;
}
g_[e[1]] = 7 - (g_[e[0]] + g_[e[2]]) % 3;
marked_vertices[e[1]] = true;
} else {
g_[e[2]] = (8 - g_[e[0]] + g_[e[1]]) % 3;
marked_vertices[e[2]] = true;
}
}
}
// table used for looking up the number of assigned vertices to a 8-bit integer
static cmph_uint8 kBdzLookupTable[] =
{
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1, 0
};
template <class Key, class HashFcn>
void MPHTable<Key, HashFcn>::Ranking() {
cmph_uint32 nbytes_total = static_cast<cmph_uint32>(ceil(st->n / 4.0));
cmph_uint32 size = k_ >> 2U;
ranktablesize = static_cast<cmph_uint32>(ceil(n_ / static_cast<double>(k_)));
// TODO(davi) Change swap of member classes for resize + memset to avoid fragmentation
vector<cmph_uint32> (ranktablesize).swap(ranktable_);;
cmph_uint32 offset = 0;
cmph_uint32 count = 0;
cmph_uint32 i = 0;
while (1) {
if (i == ranktable.size()) break;
cmph_uint32 nbytes = size < nbytes_total ? size : nbytes_total;
for (j = 0; j < nbytes; ++j) count += kBdzLookupTable[g_[offset + j]];
ranktable_[i] = count;
offset += nbytes;
nbytes_total -= size;
++i;
}
}
template <class Key, class HashFcn>
cmph_uint32 MPHTable<Key, HashFcn>::Search(const key_type& key) const {
cmph_uint32 vertex;
cmph_uint32 h[3];
for (int i = 0; i < 3; ++i) h[i] = hash_function_[i](key);
h[0] = h[0] % st->r;
h[1] = h[1] % st->r + st->r;
h[2] = h[2] % st->r + (st->r << 1);
cmph_uint32 vertex = h[(h[g_[h[0]] + g_[h[1]] + g_[h[2]]) % 3];
return Rank(st->b, st->ranktable, vertex);
}
template <class Key, class HashFcn>
cmph_uint32 MPHTable<Key, HashFcn>::Rank(cmph_uint32 vertex) const {
cmph_uint32 index = vertex >> b_;
cmph_uint32 base_rank = ranktable_[index];
cmph_uint32 beg_idx_v = index << b;
cmph_uint32 beg_idx_b = index >> 2
cmph_uint32 end_idx_b = index >> 2
while (beg_idx_b < end_idx_b) base_rank += kBdzLookupTable[g_[beg_idx_b++]];
beg_idx_v = beg_idx_b << 2;
while (beg_idx_v < vertex) {
if (g_[beg_idx_v) != kUnassigned) ++base_rank;
++beg_idx_v;
}
return base_rank;
}
template <class Key, class HashFcn>
cmph_uint32 MPHTable<Key, HashFcn>::index(const key_type& key) const {
return Search(key);
}
} // namespace cxxmph

View File

@ -1,15 +1,22 @@
#ifndef __CXXMPH_MPHTABLE_H__
#define __CXXMPH_MPHTABLE_H__
// Minimal perfect hash abstraction implementing the BDZ algorithm // Minimal perfect hash abstraction implementing the BDZ algorithm
#include <vector> #include <vector>
#include "randomly_seeded_hash.h"
#include "stringpiece.h"
#include "trigraph.h" #include "trigraph.h"
template <class Key, class NewRandomlySeededHashFcn = __gnu_cxx::hash<Key> > namespace cxxmph {
template <class Key, class NewRandomlySeededHashFcn = RandomlySeededMurmur2>
class MPHTable { class MPHTable {
public: public:
typedef Key key_type; typedef Key key_type;
typedef NewRandomlySeededHashFcn hasher; typedef NewRandomlySeededHashFcn hasher;
MPHTable(); MPHTable(double c = 1.23, cmph_uint8 b = 7) : c_(c), b_(b) { }
~MPHTable(); ~MPHTable();
template <class ForwardIterator> template <class ForwardIterator>
@ -17,28 +24,38 @@ class MPHTable {
cmph_uint32 index(const key_type& x) const; cmph_uint32 index(const key_type& x) const;
private: private:
typedef std::vector<cmph_uint32> Queue;
template <class ForwardIterator> template <class ForwardIterator>
struct TableBuilderState { bool Mapping(ForwardIterator begin, ForwardIterator end,
ForwardIterator begin; vector<Edge>* edges, vector<cmph_uint32> queue);
ForwardIterator end; bool GenerateQueue(TriGraph* graph, vector<cmph_uint32>* queue);
Queue edges_queue; void Assigning(TriGraph* graph_builder, Queue* queue);
TriGraph graph_builder; void Ranking(TriGraph* graph_builder, Queue* queue);
double c;
cmph_uint32 m;
cmph_uint32 n;
cmph_uint32 k;
cmph_uint32 ranktablesize;
};
int GenerateQueue(
cmph_uint32 nedges, cmph_uint32 nvertices,
TriGraph* graph, Queue* queue);
void Assigning(TriGraph* graph, Queue* queue);
void Ranking(TriGraph* graph, Queue* queue);
cmph_uint32 Search(const StringPiece& key); cmph_uint32 Search(const StringPiece& key);
cmph_uint32 Rank(const StringPiece& key); cmph_uint32 Rank(const StringPiece& key);
std::vector<ConnectedEdge> graph_; // Algorithm parameters
cmph_uint8 b_; // Number of bits of the kth index in the ranktable
double c_; // Number of bits per key (? is it right)
// Values used during generation
cmph_uint32 m_; // edges count
cmph_uint32 n_; // vertex count
cmph_uint32 k_ // kth index in ranktable, $k = log_2(n=3r)\varepsilon$
// Values used during search
// Partition vertex count, derived from c parameter.
cmph_uint32 r_;
// The array containing the minimal perfect hash function graph.
std::vector<cmph_uint8> g_;
// The table used for the rank step of the minimal perfect hash function
std::vector<cmph_uint32> ranktable_;
// The selected hash function triplet for finding the edges in the minimal
// perfect hash function graph.
hasher hash_function_[3];
}; };
} // namespace cxxmph
#define // __CXXMPH_MPHTABLE_H__

22
cxxmph/mphtable_test.cc Normal file
View File

@ -0,0 +1,22 @@
#include <cassert>
#include <vector>
#include "mphtable.h"
using std::vector;
using cxxmph::MPHTable;
int main(int argc, char** argv) {
vector<int> keys;
keys.push_back(10);
keys.push_back(4);
keys.push_back(3);
MPHTable<int> mphtable;
assert(mphtable.Reset(keys.begin(), keys.end()));
vector<int> ids;
for (int i = 0; i < keys.size(); ++i) ids.push_back(mphtable.index(keys[i]));
sort(ids.begin(), ids.end());
for (int i = 0; i < ids.size(); ++i) assert(ids[i] == i);
}

View File

@ -0,0 +1,24 @@
#ifndef __CXXMPH_RANDOMLY_SEEDED_HASH__
#define __CXXMPH_RANDOMLY_SEEDED_HASH__
// Helper to create randomly seeded hash functions out of existing hash
// functions that take a seed as a parameter.
#include <cstdlib>
#include "../src/cmph_types.h"
#include "MurmurHash2.h"
namespace cxxmph {
struct RandomlySeededMurmur2 {
RandomlySeededHashFunction() : seed(random()) { }
cmph_uint32 operator()(const StringPiece& key) {
return MurmurHash2(key.data(), key.length(), seed);
}
cmph_uint32 seed;
};
} // namespace cxxmph
#endif // __CXXMPH_RANDOMLY_SEEDED_HASH__

177
cxxmph/stringpiece.h Normal file
View File

@ -0,0 +1,177 @@
// Copyright 2001-2010 The RE2 Authors. All Rights Reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// A string-like object that points to a sized piece of memory.
//
// Functions or methods may use const StringPiece& parameters to accept either
// a "const char*" or a "string" value that will be implicitly converted to
// a StringPiece. The implicit conversion means that it is often appropriate
// to include this .h file in other files rather than forward-declaring
// StringPiece as would be appropriate for most other Google classes.
//
// Systematic usage of StringPiece is encouraged as it will reduce unnecessary
// conversions from "const char*" to "string" and back again.
//
//
// Arghh! I wish C++ literals were "string".
#ifndef CXXMPH_STRINGPIECE_H__
#define CXXMPH_STRINGPIECE_H__
#include <string.h>
#include <iosfwd>
#include <string>
namespace cxxmph {
class StringPiece {
private:
const char* ptr_;
int length_;
public:
// We provide non-explicit singleton constructors so users can pass
// in a "const char*" or a "string" wherever a "StringPiece" is
// expected.
StringPiece() : ptr_(NULL), length_(0) { }
StringPiece(const char* str)
: ptr_(str), length_((str == NULL) ? 0 : static_cast<int>(strlen(str))) { }
StringPiece(const std::string& str)
: ptr_(str.data()), length_(static_cast<int>(str.size())) { }
StringPiece(const char* offset, int len) : ptr_(offset), length_(len) { }
// data() may return a pointer to a buffer with embedded NULs, and the
// returned buffer may or may not be null terminated. Therefore it is
// typically a mistake to pass data() to a routine that expects a NUL
// terminated string.
const char* data() const { return ptr_; }
int size() const { return length_; }
int length() const { return length_; }
bool empty() const { return length_ == 0; }
void clear() { ptr_ = NULL; length_ = 0; }
void set(const char* data, int len) { ptr_ = data; length_ = len; }
void set(const char* str) {
ptr_ = str;
if (str != NULL)
length_ = static_cast<int>(strlen(str));
else
length_ = 0;
}
void set(const void* data, int len) {
ptr_ = reinterpret_cast<const char*>(data);
length_ = len;
}
char operator[](int i) const { return ptr_[i]; }
void remove_prefix(int n) {
ptr_ += n;
length_ -= n;
}
void remove_suffix(int n) {
length_ -= n;
}
int compare(const StringPiece& x) const {
int r = memcmp(ptr_, x.ptr_, std::min(length_, x.length_));
if (r == 0) {
if (length_ < x.length_) r = -1;
else if (length_ > x.length_) r = +1;
}
return r;
}
std::string as_string() const {
return std::string(data(), size());
}
// We also define ToString() here, since many other string-like
// interfaces name the routine that converts to a C++ string
// "ToString", and it's confusing to have the method that does that
// for a StringPiece be called "as_string()". We also leave the
// "as_string()" method defined here for existing code.
std::string ToString() const {
return std::string(data(), size());
}
void CopyToString(std::string* target) const;
void AppendToString(std::string* target) const;
// Does "this" start with "x"
bool starts_with(const StringPiece& x) const {
return ((length_ >= x.length_) &&
(memcmp(ptr_, x.ptr_, x.length_) == 0));
}
// Does "this" end with "x"
bool ends_with(const StringPiece& x) const {
return ((length_ >= x.length_) &&
(memcmp(ptr_ + (length_-x.length_), x.ptr_, x.length_) == 0));
}
// standard STL container boilerplate
typedef char value_type;
typedef const char* pointer;
typedef const char& reference;
typedef const char& const_reference;
typedef size_t size_type;
typedef ptrdiff_t difference_type;
static const size_type npos;
typedef const char* const_iterator;
typedef const char* iterator;
typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
typedef std::reverse_iterator<iterator> reverse_iterator;
iterator begin() const { return ptr_; }
iterator end() const { return ptr_ + length_; }
const_reverse_iterator rbegin() const {
return const_reverse_iterator(ptr_ + length_);
}
const_reverse_iterator rend() const {
return const_reverse_iterator(ptr_);
}
// STLS says return size_type, but Google says return int
int max_size() const { return length_; }
int capacity() const { return length_; }
int copy(char* buf, size_type n, size_type pos = 0) const;
int find(const StringPiece& s, size_type pos = 0) const;
int find(char c, size_type pos = 0) const;
int rfind(const StringPiece& s, size_type pos = npos) const;
int rfind(char c, size_type pos = npos) const;
StringPiece substr(size_type pos, size_type n = npos) const;
};
} // namespace cxxmph
bool operator==(const cxxmph::StringPiece& x, const cxxmph::StringPiece& y);
inline bool operator!=(const cxxmph::StringPiece& x, const cxxmph::StringPiece& y) {
return !(x == y);
}
inline bool operator<(const cxxmph::StringPiece& x, const cxxmph::StringPiece& y) {
const int r = memcmp(x.data(), y.data(),
std::min(x.size(), y.size()));
return ((r < 0) || ((r == 0) && (x.size() < y.size())));
}
inline bool operator>(const cxxmph::StringPiece& x, const cxxmph::StringPiece& y) {
return y < x;
}
inline bool operator<=(const cxxmph::StringPiece& x, const cxxmph::StringPiece& y) {
return !(x > y);
}
inline bool operator>=(const cxxmph::StringPiece& x, const cxxmph::StringPiece& y) {
return !(x < y);
}
// allow StringPiece to be logged
extern std::ostream& operator<<(std::ostream& o, const cxxmph::StringPiece& piece);
#endif // CXXMPH_STRINGPIECE_H__

View File

@ -1,3 +1,4 @@
#include <cassert>
#include <limits> #include <limits>
#include "trigraph.h" #include "trigraph.h"
@ -8,17 +9,51 @@ namespace {
static const cmph_uint8 kInvalidEdge = std::numeric_limits<cmph_uint8>::max(); static const cmph_uint8 kInvalidEdge = std::numeric_limits<cmph_uint8>::max();
} }
namespace cxxmph {
TriGraph::TriGraph(cmph_uint32 nedges, cmph_uint32 nvertices) TriGraph::TriGraph(cmph_uint32 nedges, cmph_uint32 nvertices)
: nedges_(0), : nedges_(0),
edges_(nedges), edges_(nedges),
first_edge_(nvertices, kInvalidEdge), first_edge_(nvertices, kInvalidEdge),
vertex_degree_(nvertices, 0) { } vertex_degree_(nvertices, 0) { }
void TriGraph::ExtractEdgesAndClear(vector<ConnectedEdge>* edges) { void TriGraph::ExtractEdgesAndClear(vector<Edge>* edges) {
vector<Edge>().swap(next_edge_);
vector<cmph_uint32>().swap(first_edge_); vector<cmph_uint32>().swap(first_edge_);
vector<cmph_uint8>().swap(vertex_degree_); vector<cmph_uint8>().swap(vertex_degree_);
nedges_ = 0; nedges_ = 0;
edges->swap(edges_); edges->swap(edges_);
} }
void TriGraph::AddEdge(const Edge& edge) { } void TriGraph::AddEdge(const Edge& edge) {
void TriGraph::RemoveEdge(cmph_uint32 current_edge) { } edges_[nedges_] = edge;
next_edge_[nedges_] = Edge(
first_edge_[edge[0]], first_edge_[edge[1]], first_edge_[edge[2]]);
first_edge_[edge[0]] = first_edge_[edge[1]] = first_edge_[edge[2]] = nedges_;
++vertex_degree_[edge[0]];
++vertex_degree_[edge[1]];
++vertex_degree_[edge[2]];
++nedges_;
}
void TriGraph::RemoveEdge(cmph_uint32 current_edge) {
cmph_uint32 vertex, edge1, edge2;
for (int i = 0; i < 3; ++i) {
cmph_uint32 vertex = edges_[current_edge][i];
cmph_uint32 edge1 = first_edge_[vertex];
cmph_uint32 edge2 = kInvalidEdge;
cmph_uint32 j = 0;
while (edge1 != current_edge && edge1 != kInvalidEdge) {
edge2 = edge1;
if (edges_[edge1][0] == vertex) j = 0;
else if (edges_[edge1][1] == vertex) j = 1;
else j = 2;
edge1 = next_edge_[edge1][j];
}
assert(edge1 != kInvalidEdge);
if (edge2 != kInvalidEdge) next_edge_[edge2][j] = next_edge_[edge1][i];
else first_edge_[vertex] = next_edge_[edge1][i];
--vertex_degree_[vertex];
}
}
} // namespace cxxmph

View File

@ -1,26 +1,43 @@
#ifndef __CXXMPH_TRIGRAPH_H__
#define __CXXMPH_TRIGRAPH_H__
// Build a trigraph using a memory efficient representation.
//
// Prior knowledge of the number of edges and vertices for the graph is
// required. For each vertex, we store how many edges touch it (degree) and the
// index of the first edge in the vector of triples representing the edges.
#include <vector> #include <vector>
#include "../src/cmph_types.h" #include "../src/cmph_types.h"
namespace cxxmph {
class TriGraph { class TriGraph {
struct Edge { struct Edge {
Edge() { } Edge() { }
Edge(cmph_uint32 v0, cmph_uint32 v1, cmph_uint32 v2); Edge(cmph_uint32 v0, cmph_uint32 v1, cmph_uint32 v2);
cmph_uint32& operator[](cmph_uint8 v) { return vertices[v]; }
const cmph_uint32& operator[](cmph_uint8 v) const { return vertices[v]; }
cmph_uint32 vertices[3]; cmph_uint32 vertices[3];
}; };
struct ConnectedEdge {
Edge current;
Edge next;
};
TriGraph(cmph_uint32 nedges, cmph_uint32 nvertices); TriGraph(cmph_uint32 nedges, cmph_uint32 nvertices);
void AddEdge(const Edge& edge); void AddEdge(const Edge& edge);
void RemoveEdge(cmph_uint32 current_edge); void RemoveEdge(cmph_uint32 edge_id);
void ExtractEdgesAndClear(std::vector<ConnectedEdge>* edges); void ExtractEdgesAndClear(std::vector<Edge>* edges);
const std::vector<Edge>& edges() const { return edges_; }
const std::vector<cmph_uint8>& vertex_degree() const { return vertex_degree_; }
const std::vector<cmph_uint32>& first_edge() const { return first_edge_; }
private: private:
cmph_uint32 nedges_; cmph_uint32 nedges_; // total number of edges
std::vector<ConnectedEdge> edges_; std::vector<Edge> edges_;
std::vector<cmph_uint32> first_edge_; std::vector<Edge> next_edge_; // for implementing removal
std::vector<cmph_uint8> vertex_degree_; std::vector<cmph_uint32> first_edge_; // the first edge for this vertex
std::vector<cmph_uint8> vertex_degree_; // number of edges for this vertex
}; };
} // namespace cxxmph
#endif // __CXXMPH_TRIGRAPH_H__