Added murmur hash and finished porting all c code.
This commit is contained in:
parent
bf0c5892d8
commit
724e716d67
@ -1,8 +1,11 @@
|
||||
bin_PROGRAMS = cmph_hash_map_test
|
||||
bin_PROGRAMS = cmph_hash_map_test mphtable_test
|
||||
lib_LTLIBRARIES = libcxxmph.la
|
||||
|
||||
libcxxmph_la_SOURCES = trigragh.h trigraph.cc
|
||||
libcxxmph_la_SOURCES = stringpiece.h MurmurHash2.h randomly_seeded_hash.h trigragh.h trigraph.cc mphtable.h mphtable.cc
|
||||
libcxxmph_la_LDFLAGS = -version-info 0:0:0
|
||||
|
||||
cmph_hash_map_test_LDADD = libcxxmph.la
|
||||
cmph_hash_map_test_SOURCES = cmph_hash_map_test.cc
|
||||
|
||||
mphtable_test_LDADD = libcxxmph.la
|
||||
mphtable_test_SOURCES = mphtable_test.cc
|
||||
|
64
cxxmph/MurmurHash2.h
Normal file
64
cxxmph/MurmurHash2.h
Normal file
@ -0,0 +1,64 @@
|
||||
//-----------------------------------------------------------------------------
|
||||
// MurmurHash2, by Austin Appleby
|
||||
|
||||
// Note - This code makes a few assumptions about how your machine behaves -
|
||||
|
||||
// 1. We can read a 4-byte value from any address without crashing
|
||||
// 2. sizeof(int) == 4
|
||||
|
||||
// And it has a few limitations -
|
||||
|
||||
// 1. It will not work incrementally.
|
||||
// 2. It will not produce the same results on little-endian and big-endian
|
||||
// machines.
|
||||
|
||||
unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed )
|
||||
{
|
||||
// 'm' and 'r' are mixing constants generated offline.
|
||||
// They're not really 'magic', they just happen to work well.
|
||||
|
||||
const unsigned int m = 0x5bd1e995;
|
||||
const int r = 24;
|
||||
|
||||
// Initialize the hash to a 'random' value
|
||||
|
||||
unsigned int h = seed ^ len;
|
||||
|
||||
// Mix 4 bytes at a time into the hash
|
||||
|
||||
const unsigned char * data = (const unsigned char *)key;
|
||||
|
||||
while(len >= 4)
|
||||
{
|
||||
unsigned int k = *(unsigned int *)data;
|
||||
|
||||
k *= m;
|
||||
k ^= k >> r;
|
||||
k *= m;
|
||||
|
||||
h *= m;
|
||||
h ^= k;
|
||||
|
||||
data += 4;
|
||||
len -= 4;
|
||||
}
|
||||
|
||||
// Handle the last few bytes of the input array
|
||||
|
||||
switch(len)
|
||||
{
|
||||
case 3: h ^= data[2] << 16;
|
||||
case 2: h ^= data[1] << 8;
|
||||
case 1: h ^= data[0];
|
||||
h *= m;
|
||||
};
|
||||
|
||||
// Do a few final mixes of the hash to ensure the last few
|
||||
// bytes are well-incorporated.
|
||||
|
||||
h ^= h >> 13;
|
||||
h *= m;
|
||||
h ^= h >> 15;
|
||||
|
||||
return h;
|
||||
}
|
@ -2,8 +2,6 @@
|
||||
#include <vector>
|
||||
#include <utility> // for std::pair
|
||||
|
||||
#include <cmph.h>
|
||||
|
||||
// Save on repetitive typing.
|
||||
#define CMPH_TMPL_SPEC template <class Key, class Data, class HashFcn, class EqualKey, class Alloc>
|
||||
#define CMPH_CLASS_SPEC cmph_hash_map<Key, Data, HashFcn, EqualKey, Alloc>
|
||||
|
@ -1,105 +1,213 @@
|
||||
#include <numerical_limits>
|
||||
#include <limits>
|
||||
|
||||
#include "mphtable.h"
|
||||
|
||||
using std::vector;
|
||||
|
||||
namespace cxxmph {
|
||||
|
||||
template <class Key, class HashFcn>
|
||||
template <class ForwardIterator>
|
||||
|
||||
void MPHTable::Reset(ForwardIterator begin, ForwardIterator end) {
|
||||
TableBuilderState st;
|
||||
st.c = 1.23;
|
||||
st.b = 7;
|
||||
st.m = end - begin;
|
||||
st.r = static_cast<cmph_uint32>(ceil((st.c*st.m)/3));
|
||||
if ((st.r % 2) == 0) st.r += 1;
|
||||
st.n = 3*st.r;
|
||||
st.k = 1U << st.b;
|
||||
st.ranktablesize = static_cast<cmph_uint32>(
|
||||
ceil(st.n / static_cast<double>(st.k)));
|
||||
st.graph_builder = TriGraph(st.m, st.n); // giant copy
|
||||
st.edges_queue.resize(st.m)
|
||||
bool MPHTable<Key, HashFcn>::Reset(ForwardIterator begin, ForwardIterator end) {
|
||||
TableBuilderState<ForwardIterator> st;
|
||||
m_ = end - begin;
|
||||
r_ = static_cast<cmph_uint32>(ceil((c_*m_)/3));
|
||||
if (r_ % 2) == 0) r_ += 1;
|
||||
n_ = 3*r_;
|
||||
k_ = 1U << b_;
|
||||
|
||||
int iterations = 1000;
|
||||
while (1) {
|
||||
hasher hasher0 = HashFcn();
|
||||
ok = Mapping(st.graph_builder, st.edges_queue);
|
||||
if (ok) break;
|
||||
for (int i = 0; i < 3; ++i) hash_function_[i] = hasher();
|
||||
vector<Edge> edges;
|
||||
vector<cmph_uint32> queue;
|
||||
if (Mapping(begin, end, &edges, &queue)) break;
|
||||
else --iterations;
|
||||
if (iterations == 0) break;
|
||||
}
|
||||
if (iterations == 0) return false;
|
||||
vector<ConnectedEdge> graph;
|
||||
st.graph_builder.ExtractEdgesAndClear(&graph);
|
||||
Assigning(graph, st.edges_queue);
|
||||
vector<cmph_uint32>().swap(st.edges_queue);
|
||||
Ranking(graph);
|
||||
vector<Edge>& edges;
|
||||
graph->ExtractEdgesAndClear(&edges);
|
||||
Assigning(queue, edges);
|
||||
vector<cmph_uint32>().swap(edges);
|
||||
Ranking();
|
||||
|
||||
}
|
||||
|
||||
template <class Key, class HashFcn>
|
||||
int MPHTable::GenerateQueue(
|
||||
cmph_uint32 nedges, cmph_uint32 nvertices,
|
||||
TriGraph* graph, Queue* queue) {
|
||||
bool MPHTable<Key, HashFcn>::GenerateQueue(
|
||||
TriGraph* graph, vector<cmph_uint32>* queue_output) {
|
||||
cmph_uint32 queue_head = 0, queue_tail = 0;
|
||||
cmph_uint32 nedges = n_;
|
||||
cmph_uint32 nvertices = m_;
|
||||
// Relies on vector<bool> using 1 bit per element
|
||||
vector<bool> marked_edge((nedges >> 3) + 1, false);
|
||||
queue->swap(Queue(nvertices, 0));
|
||||
Queue queue(nvertices, 0);
|
||||
for (int i = 0; i < nedges; ++i) {
|
||||
TriGraph::Edge e = graph.edges[i].vertices;
|
||||
if (graph.vertex_degree_[e.vertices[0]] == 1 ||
|
||||
graph.vertex_degree_[e.vertices[1]] == 1 ||
|
||||
graph.vertex_degree[e.vertices[2]] == 1) {
|
||||
const TriGraph::Edge& e = graph->edges()[i];
|
||||
if (graph->vertex_degree()[e[0]] == 1 ||
|
||||
graph->vertex_degree()[e[1]] == 1 ||
|
||||
graph->vertex_degree()[e[2]] == 1) {
|
||||
if (!marked_edge[i]) {
|
||||
(*queue)[queue_head++] = i;
|
||||
queue[queue_head++] = i;
|
||||
marked_edge[i] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
while (queue_tail != queue_head) {
|
||||
cmph_uint32 current_edge = (*queue)[queue_tail++];
|
||||
cmph_uint32 current_edge = queue[queue_tail++];
|
||||
graph->RemoveEdge(current_edge);
|
||||
TriGraph::Edge e = graph->edges[current_edge];
|
||||
const TriGraph::Edge& e = graph->edges()[current_edge];
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
cmph_uint32 v = e.vertices[i];
|
||||
if (graph->vertex_degree[v] == 1) {
|
||||
cmph_uint32 first_edge = graph->first_edge_[v];
|
||||
if (!marked_edge[first_edge) {
|
||||
cmph_uint32 v = e[i];
|
||||
if (graph->vertex_degree()[v] == 1) {
|
||||
cmph_uint32 first_edge = graph->first_edge()[v];
|
||||
if (!marked_edge[first_edge]) {
|
||||
queue[queue_head++] = first_edge;
|
||||
marked_edge[first_edge] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
vector<bool>().swap(marked_edge);
|
||||
return queue_head - nedges;
|
||||
}
|
||||
|
||||
template <class Key, class HashFcn>
|
||||
int MPHTable::Mapping(TriGraph* graph, Queue* queue) {
|
||||
int cycles = 0;
|
||||
graph->Reset(m, n);
|
||||
for (ForwardIterator it = begin_; it != end_; ++it) {
|
||||
cmph_uint32 hash_values[3];
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
hash_values[i] = hasher_(*it);
|
||||
}
|
||||
cmph_uint32 v0 = hash_values[0] % bdz->r;
|
||||
cmph_uint32 v1 = hash_values[1] % bdz->r + bdz->r;
|
||||
cmph_uint32 v2 = hash_values[2] % bdz->r + (bdz->r << 1);
|
||||
graph->AddEdge(Edge(v0, v1, v2));
|
||||
}
|
||||
cycles = GenerateQueue(bdz->m, bdz->n, queue, graph);
|
||||
int cycles = queue_head - nedges;
|
||||
if (cycles == 0) queue.swap(*queue_output);
|
||||
return cycles == 0;
|
||||
}
|
||||
|
||||
void MPHTable::Assigning(TriGraph* graph, Queue* queue) {
|
||||
}
|
||||
void MPHTable::Ranking(TriGraph* graph, Queue* queue) {
|
||||
}
|
||||
cmph_uint32 MPHTable::Search(const key_type& key) {
|
||||
template <class Key, class HashFcn>
|
||||
template <class ForwardIterator>
|
||||
bool MPHTable<Key, HashFcn>::Mapping(
|
||||
ForwardIterator begin, ForwardIterator end,
|
||||
vector<Edge>* edges, vector<cmph_uint32> queue) {
|
||||
int cycles = 0;
|
||||
TriGraph graph(m, n);
|
||||
for (ForwardIterator it = begin; it != end; ++it) {
|
||||
cmph_uint32 h[3];
|
||||
for (int i = 0; i < 3; ++i) h[i] = hash_function_[i](*it);
|
||||
cmph_uint32 v0 = h[0] % r_;
|
||||
cmph_uint32 v1 = h[1] % r_ + r_;
|
||||
cmph_uint32 v2 = h[2] % r_ + (r_ << 1);
|
||||
graph.AddEdge(Edge(v0, v1, v2));
|
||||
}
|
||||
if (GenerateQueue(&graph, queue)) {
|
||||
graph.ExtractEdgesAndClear(edges);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
cmph_uint32 MPHTable::Rank(const key_type& key) {
|
||||
template <class Key, class HashFcn>
|
||||
void MPHTable<Key, HashFcn>::Assigning(
|
||||
const vector<Edge>& edges, const vector<cmph_uint32>& queue) {
|
||||
cmph_uint32 nedges = n_;
|
||||
cmph_uint32 current_edge = 0;
|
||||
vector<bool> marked_vertices(nedges + 1);
|
||||
// TODO(davi) use half nibbles instead
|
||||
// vector<cmph_uint8> g(static_cast<cmph_uint32>(ceil(nedges / 4.0)),
|
||||
// std::numerical_limits<cmph_uint8>::max());
|
||||
static const cmph_uint8 kUnassigned = 3;
|
||||
vector<cmph_uint8>(nedges, kUnassigned).swap(g_);
|
||||
for (int i = nedges - 1; i + 1 >= 1; --i) {
|
||||
current_edge = queue[i];
|
||||
const TriGraph::Edge& e = edges[current_edge];
|
||||
if (!marked_vertices[e[0]]) {
|
||||
if (!marked_vertices[e[1]]) {
|
||||
g_[e[1]] = kUnassigned;
|
||||
marked_vertices[e[1]] = true;
|
||||
}
|
||||
if (!marked_vertices[e[2]]) {
|
||||
g_[e[2]] = kUnassigned;
|
||||
marked_vertices[e[2]] = true;
|
||||
}
|
||||
g_[e[0]] = (6 - g_[e[1]] + g_[e2]) % 3;
|
||||
marked_vertices[e[0]] = true;
|
||||
} else if (!marked_vertices[e[1]])) {
|
||||
if (!marked_vertices[e[2]])) {
|
||||
g_[e[2]] = kUnassigned;
|
||||
marked_vertices[e[2]] = true;
|
||||
}
|
||||
g_[e[1]] = 7 - (g_[e[0]] + g_[e[2]]) % 3;
|
||||
marked_vertices[e[1]] = true;
|
||||
} else {
|
||||
g_[e[2]] = (8 - g_[e[0]] + g_[e[1]]) % 3;
|
||||
marked_vertices[e[2]] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// table used for looking up the number of assigned vertices to a 8-bit integer
|
||||
static cmph_uint8 kBdzLookupTable[] =
|
||||
{
|
||||
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
||||
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
||||
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
||||
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
||||
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
||||
3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
|
||||
2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1, 0
|
||||
};
|
||||
|
||||
template <class Key, class HashFcn>
|
||||
void MPHTable<Key, HashFcn>::Ranking() {
|
||||
cmph_uint32 nbytes_total = static_cast<cmph_uint32>(ceil(st->n / 4.0));
|
||||
cmph_uint32 size = k_ >> 2U;
|
||||
ranktablesize = static_cast<cmph_uint32>(ceil(n_ / static_cast<double>(k_)));
|
||||
// TODO(davi) Change swap of member classes for resize + memset to avoid fragmentation
|
||||
vector<cmph_uint32> (ranktablesize).swap(ranktable_);;
|
||||
cmph_uint32 offset = 0;
|
||||
cmph_uint32 count = 0;
|
||||
cmph_uint32 i = 0;
|
||||
while (1) {
|
||||
if (i == ranktable.size()) break;
|
||||
cmph_uint32 nbytes = size < nbytes_total ? size : nbytes_total;
|
||||
for (j = 0; j < nbytes; ++j) count += kBdzLookupTable[g_[offset + j]];
|
||||
ranktable_[i] = count;
|
||||
offset += nbytes;
|
||||
nbytes_total -= size;
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
template <class Key, class HashFcn>
|
||||
cmph_uint32 MPHTable<Key, HashFcn>::Search(const key_type& key) const {
|
||||
cmph_uint32 vertex;
|
||||
cmph_uint32 h[3];
|
||||
for (int i = 0; i < 3; ++i) h[i] = hash_function_[i](key);
|
||||
h[0] = h[0] % st->r;
|
||||
h[1] = h[1] % st->r + st->r;
|
||||
h[2] = h[2] % st->r + (st->r << 1);
|
||||
cmph_uint32 vertex = h[(h[g_[h[0]] + g_[h[1]] + g_[h[2]]) % 3];
|
||||
return Rank(st->b, st->ranktable, vertex);
|
||||
}
|
||||
|
||||
template <class Key, class HashFcn>
|
||||
cmph_uint32 MPHTable<Key, HashFcn>::Rank(cmph_uint32 vertex) const {
|
||||
cmph_uint32 index = vertex >> b_;
|
||||
cmph_uint32 base_rank = ranktable_[index];
|
||||
cmph_uint32 beg_idx_v = index << b;
|
||||
cmph_uint32 beg_idx_b = index >> 2
|
||||
cmph_uint32 end_idx_b = index >> 2
|
||||
while (beg_idx_b < end_idx_b) base_rank += kBdzLookupTable[g_[beg_idx_b++]];
|
||||
beg_idx_v = beg_idx_b << 2;
|
||||
while (beg_idx_v < vertex) {
|
||||
if (g_[beg_idx_v) != kUnassigned) ++base_rank;
|
||||
++beg_idx_v;
|
||||
}
|
||||
return base_rank;
|
||||
}
|
||||
|
||||
template <class Key, class HashFcn>
|
||||
cmph_uint32 MPHTable<Key, HashFcn>::index(const key_type& key) const {
|
||||
return Search(key);
|
||||
}
|
||||
|
||||
} // namespace cxxmph
|
||||
|
@ -1,15 +1,22 @@
|
||||
#ifndef __CXXMPH_MPHTABLE_H__
|
||||
#define __CXXMPH_MPHTABLE_H__
|
||||
|
||||
// Minimal perfect hash abstraction implementing the BDZ algorithm
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "randomly_seeded_hash.h"
|
||||
#include "stringpiece.h"
|
||||
#include "trigraph.h"
|
||||
|
||||
template <class Key, class NewRandomlySeededHashFcn = __gnu_cxx::hash<Key> >
|
||||
namespace cxxmph {
|
||||
|
||||
template <class Key, class NewRandomlySeededHashFcn = RandomlySeededMurmur2>
|
||||
class MPHTable {
|
||||
public:
|
||||
typedef Key key_type;
|
||||
typedef NewRandomlySeededHashFcn hasher;
|
||||
MPHTable();
|
||||
MPHTable(double c = 1.23, cmph_uint8 b = 7) : c_(c), b_(b) { }
|
||||
~MPHTable();
|
||||
|
||||
template <class ForwardIterator>
|
||||
@ -17,28 +24,38 @@ class MPHTable {
|
||||
cmph_uint32 index(const key_type& x) const;
|
||||
|
||||
private:
|
||||
typedef std::vector<cmph_uint32> Queue;
|
||||
template<class ForwardIterator>
|
||||
struct TableBuilderState {
|
||||
ForwardIterator begin;
|
||||
ForwardIterator end;
|
||||
Queue edges_queue;
|
||||
TriGraph graph_builder;
|
||||
double c;
|
||||
cmph_uint32 m;
|
||||
cmph_uint32 n;
|
||||
cmph_uint32 k;
|
||||
cmph_uint32 ranktablesize;
|
||||
};
|
||||
int GenerateQueue(
|
||||
cmph_uint32 nedges, cmph_uint32 nvertices,
|
||||
TriGraph* graph, Queue* queue);
|
||||
void Assigning(TriGraph* graph, Queue* queue);
|
||||
void Ranking(TriGraph* graph, Queue* queue);
|
||||
cmph_uint32 Search(const StringPiece& key);
|
||||
cmph_uint32 Rank(const StringPiece& key);
|
||||
template <class ForwardIterator>
|
||||
bool Mapping(ForwardIterator begin, ForwardIterator end,
|
||||
vector<Edge>* edges, vector<cmph_uint32> queue);
|
||||
bool GenerateQueue(TriGraph* graph, vector<cmph_uint32>* queue);
|
||||
void Assigning(TriGraph* graph_builder, Queue* queue);
|
||||
void Ranking(TriGraph* graph_builder, Queue* queue);
|
||||
cmph_uint32 Search(const StringPiece& key);
|
||||
cmph_uint32 Rank(const StringPiece& key);
|
||||
|
||||
std::vector<ConnectedEdge> graph_;
|
||||
// Algorithm parameters
|
||||
cmph_uint8 b_; // Number of bits of the kth index in the ranktable
|
||||
double c_; // Number of bits per key (? is it right)
|
||||
|
||||
// Values used during generation
|
||||
cmph_uint32 m_; // edges count
|
||||
cmph_uint32 n_; // vertex count
|
||||
cmph_uint32 k_ // kth index in ranktable, $k = log_2(n=3r)\varepsilon$
|
||||
|
||||
// Values used during search
|
||||
|
||||
// Partition vertex count, derived from c parameter.
|
||||
cmph_uint32 r_;
|
||||
// The array containing the minimal perfect hash function graph.
|
||||
std::vector<cmph_uint8> g_;
|
||||
// The table used for the rank step of the minimal perfect hash function
|
||||
std::vector<cmph_uint32> ranktable_;
|
||||
// The selected hash function triplet for finding the edges in the minimal
|
||||
// perfect hash function graph.
|
||||
hasher hash_function_[3];
|
||||
|
||||
};
|
||||
|
||||
} // namespace cxxmph
|
||||
|
||||
#define // __CXXMPH_MPHTABLE_H__
|
||||
|
22
cxxmph/mphtable_test.cc
Normal file
22
cxxmph/mphtable_test.cc
Normal file
@ -0,0 +1,22 @@
|
||||
#include <cassert>
|
||||
#include <vector>
|
||||
|
||||
#include "mphtable.h"
|
||||
|
||||
using std::vector;
|
||||
using cxxmph::MPHTable;
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
vector<int> keys;
|
||||
keys.push_back(10);
|
||||
keys.push_back(4);
|
||||
keys.push_back(3);
|
||||
|
||||
MPHTable<int> mphtable;
|
||||
assert(mphtable.Reset(keys.begin(), keys.end()));
|
||||
vector<int> ids;
|
||||
for (int i = 0; i < keys.size(); ++i) ids.push_back(mphtable.index(keys[i]));
|
||||
sort(ids.begin(), ids.end());
|
||||
for (int i = 0; i < ids.size(); ++i) assert(ids[i] == i);
|
||||
}
|
||||
|
24
cxxmph/randomly_seeded_hash.h
Normal file
24
cxxmph/randomly_seeded_hash.h
Normal file
@ -0,0 +1,24 @@
|
||||
#ifndef __CXXMPH_RANDOMLY_SEEDED_HASH__
|
||||
#define __CXXMPH_RANDOMLY_SEEDED_HASH__
|
||||
|
||||
// Helper to create randomly seeded hash functions out of existing hash
|
||||
// functions that take a seed as a parameter.
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
#include "../src/cmph_types.h"
|
||||
#include "MurmurHash2.h"
|
||||
|
||||
namespace cxxmph {
|
||||
|
||||
struct RandomlySeededMurmur2 {
|
||||
RandomlySeededHashFunction() : seed(random()) { }
|
||||
cmph_uint32 operator()(const StringPiece& key) {
|
||||
return MurmurHash2(key.data(), key.length(), seed);
|
||||
}
|
||||
cmph_uint32 seed;
|
||||
};
|
||||
|
||||
} // namespace cxxmph
|
||||
|
||||
#endif // __CXXMPH_RANDOMLY_SEEDED_HASH__
|
177
cxxmph/stringpiece.h
Normal file
177
cxxmph/stringpiece.h
Normal file
@ -0,0 +1,177 @@
|
||||
// Copyright 2001-2010 The RE2 Authors. All Rights Reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// A string-like object that points to a sized piece of memory.
|
||||
//
|
||||
// Functions or methods may use const StringPiece& parameters to accept either
|
||||
// a "const char*" or a "string" value that will be implicitly converted to
|
||||
// a StringPiece. The implicit conversion means that it is often appropriate
|
||||
// to include this .h file in other files rather than forward-declaring
|
||||
// StringPiece as would be appropriate for most other Google classes.
|
||||
//
|
||||
// Systematic usage of StringPiece is encouraged as it will reduce unnecessary
|
||||
// conversions from "const char*" to "string" and back again.
|
||||
//
|
||||
//
|
||||
// Arghh! I wish C++ literals were "string".
|
||||
|
||||
#ifndef CXXMPH_STRINGPIECE_H__
|
||||
#define CXXMPH_STRINGPIECE_H__
|
||||
|
||||
#include <string.h>
|
||||
#include <iosfwd>
|
||||
#include <string>
|
||||
|
||||
namespace cxxmph {
|
||||
|
||||
class StringPiece {
|
||||
private:
|
||||
const char* ptr_;
|
||||
int length_;
|
||||
|
||||
public:
|
||||
// We provide non-explicit singleton constructors so users can pass
|
||||
// in a "const char*" or a "string" wherever a "StringPiece" is
|
||||
// expected.
|
||||
StringPiece() : ptr_(NULL), length_(0) { }
|
||||
StringPiece(const char* str)
|
||||
: ptr_(str), length_((str == NULL) ? 0 : static_cast<int>(strlen(str))) { }
|
||||
StringPiece(const std::string& str)
|
||||
: ptr_(str.data()), length_(static_cast<int>(str.size())) { }
|
||||
StringPiece(const char* offset, int len) : ptr_(offset), length_(len) { }
|
||||
|
||||
// data() may return a pointer to a buffer with embedded NULs, and the
|
||||
// returned buffer may or may not be null terminated. Therefore it is
|
||||
// typically a mistake to pass data() to a routine that expects a NUL
|
||||
// terminated string.
|
||||
const char* data() const { return ptr_; }
|
||||
int size() const { return length_; }
|
||||
int length() const { return length_; }
|
||||
bool empty() const { return length_ == 0; }
|
||||
|
||||
void clear() { ptr_ = NULL; length_ = 0; }
|
||||
void set(const char* data, int len) { ptr_ = data; length_ = len; }
|
||||
void set(const char* str) {
|
||||
ptr_ = str;
|
||||
if (str != NULL)
|
||||
length_ = static_cast<int>(strlen(str));
|
||||
else
|
||||
length_ = 0;
|
||||
}
|
||||
void set(const void* data, int len) {
|
||||
ptr_ = reinterpret_cast<const char*>(data);
|
||||
length_ = len;
|
||||
}
|
||||
|
||||
char operator[](int i) const { return ptr_[i]; }
|
||||
|
||||
void remove_prefix(int n) {
|
||||
ptr_ += n;
|
||||
length_ -= n;
|
||||
}
|
||||
|
||||
void remove_suffix(int n) {
|
||||
length_ -= n;
|
||||
}
|
||||
|
||||
int compare(const StringPiece& x) const {
|
||||
int r = memcmp(ptr_, x.ptr_, std::min(length_, x.length_));
|
||||
if (r == 0) {
|
||||
if (length_ < x.length_) r = -1;
|
||||
else if (length_ > x.length_) r = +1;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
std::string as_string() const {
|
||||
return std::string(data(), size());
|
||||
}
|
||||
// We also define ToString() here, since many other string-like
|
||||
// interfaces name the routine that converts to a C++ string
|
||||
// "ToString", and it's confusing to have the method that does that
|
||||
// for a StringPiece be called "as_string()". We also leave the
|
||||
// "as_string()" method defined here for existing code.
|
||||
std::string ToString() const {
|
||||
return std::string(data(), size());
|
||||
}
|
||||
|
||||
void CopyToString(std::string* target) const;
|
||||
void AppendToString(std::string* target) const;
|
||||
|
||||
// Does "this" start with "x"
|
||||
bool starts_with(const StringPiece& x) const {
|
||||
return ((length_ >= x.length_) &&
|
||||
(memcmp(ptr_, x.ptr_, x.length_) == 0));
|
||||
}
|
||||
|
||||
// Does "this" end with "x"
|
||||
bool ends_with(const StringPiece& x) const {
|
||||
return ((length_ >= x.length_) &&
|
||||
(memcmp(ptr_ + (length_-x.length_), x.ptr_, x.length_) == 0));
|
||||
}
|
||||
|
||||
// standard STL container boilerplate
|
||||
typedef char value_type;
|
||||
typedef const char* pointer;
|
||||
typedef const char& reference;
|
||||
typedef const char& const_reference;
|
||||
typedef size_t size_type;
|
||||
typedef ptrdiff_t difference_type;
|
||||
static const size_type npos;
|
||||
typedef const char* const_iterator;
|
||||
typedef const char* iterator;
|
||||
typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
|
||||
typedef std::reverse_iterator<iterator> reverse_iterator;
|
||||
iterator begin() const { return ptr_; }
|
||||
iterator end() const { return ptr_ + length_; }
|
||||
const_reverse_iterator rbegin() const {
|
||||
return const_reverse_iterator(ptr_ + length_);
|
||||
}
|
||||
const_reverse_iterator rend() const {
|
||||
return const_reverse_iterator(ptr_);
|
||||
}
|
||||
// STLS says return size_type, but Google says return int
|
||||
int max_size() const { return length_; }
|
||||
int capacity() const { return length_; }
|
||||
|
||||
int copy(char* buf, size_type n, size_type pos = 0) const;
|
||||
|
||||
int find(const StringPiece& s, size_type pos = 0) const;
|
||||
int find(char c, size_type pos = 0) const;
|
||||
int rfind(const StringPiece& s, size_type pos = npos) const;
|
||||
int rfind(char c, size_type pos = npos) const;
|
||||
|
||||
StringPiece substr(size_type pos, size_type n = npos) const;
|
||||
};
|
||||
|
||||
} // namespace cxxmph
|
||||
|
||||
bool operator==(const cxxmph::StringPiece& x, const cxxmph::StringPiece& y);
|
||||
|
||||
inline bool operator!=(const cxxmph::StringPiece& x, const cxxmph::StringPiece& y) {
|
||||
return !(x == y);
|
||||
}
|
||||
|
||||
inline bool operator<(const cxxmph::StringPiece& x, const cxxmph::StringPiece& y) {
|
||||
const int r = memcmp(x.data(), y.data(),
|
||||
std::min(x.size(), y.size()));
|
||||
return ((r < 0) || ((r == 0) && (x.size() < y.size())));
|
||||
}
|
||||
|
||||
inline bool operator>(const cxxmph::StringPiece& x, const cxxmph::StringPiece& y) {
|
||||
return y < x;
|
||||
}
|
||||
|
||||
inline bool operator<=(const cxxmph::StringPiece& x, const cxxmph::StringPiece& y) {
|
||||
return !(x > y);
|
||||
}
|
||||
|
||||
inline bool operator>=(const cxxmph::StringPiece& x, const cxxmph::StringPiece& y) {
|
||||
return !(x < y);
|
||||
}
|
||||
|
||||
// allow StringPiece to be logged
|
||||
extern std::ostream& operator<<(std::ostream& o, const cxxmph::StringPiece& piece);
|
||||
|
||||
#endif // CXXMPH_STRINGPIECE_H__
|
@ -1,3 +1,4 @@
|
||||
#include <cassert>
|
||||
#include <limits>
|
||||
|
||||
#include "trigraph.h"
|
||||
@ -8,17 +9,51 @@ namespace {
|
||||
static const cmph_uint8 kInvalidEdge = std::numeric_limits<cmph_uint8>::max();
|
||||
}
|
||||
|
||||
namespace cxxmph {
|
||||
|
||||
TriGraph::TriGraph(cmph_uint32 nedges, cmph_uint32 nvertices)
|
||||
: nedges_(0),
|
||||
edges_(nedges),
|
||||
first_edge_(nvertices, kInvalidEdge),
|
||||
vertex_degree_(nvertices, 0) { }
|
||||
|
||||
void TriGraph::ExtractEdgesAndClear(vector<ConnectedEdge>* edges) {
|
||||
void TriGraph::ExtractEdgesAndClear(vector<Edge>* edges) {
|
||||
vector<Edge>().swap(next_edge_);
|
||||
vector<cmph_uint32>().swap(first_edge_);
|
||||
vector<cmph_uint8>().swap(vertex_degree_);
|
||||
nedges_ = 0;
|
||||
edges->swap(edges_);
|
||||
}
|
||||
void TriGraph::AddEdge(const Edge& edge) { }
|
||||
void TriGraph::RemoveEdge(cmph_uint32 current_edge) { }
|
||||
void TriGraph::AddEdge(const Edge& edge) {
|
||||
edges_[nedges_] = edge;
|
||||
next_edge_[nedges_] = Edge(
|
||||
first_edge_[edge[0]], first_edge_[edge[1]], first_edge_[edge[2]]);
|
||||
first_edge_[edge[0]] = first_edge_[edge[1]] = first_edge_[edge[2]] = nedges_;
|
||||
++vertex_degree_[edge[0]];
|
||||
++vertex_degree_[edge[1]];
|
||||
++vertex_degree_[edge[2]];
|
||||
++nedges_;
|
||||
}
|
||||
|
||||
void TriGraph::RemoveEdge(cmph_uint32 current_edge) {
|
||||
cmph_uint32 vertex, edge1, edge2;
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
cmph_uint32 vertex = edges_[current_edge][i];
|
||||
cmph_uint32 edge1 = first_edge_[vertex];
|
||||
cmph_uint32 edge2 = kInvalidEdge;
|
||||
cmph_uint32 j = 0;
|
||||
while (edge1 != current_edge && edge1 != kInvalidEdge) {
|
||||
edge2 = edge1;
|
||||
if (edges_[edge1][0] == vertex) j = 0;
|
||||
else if (edges_[edge1][1] == vertex) j = 1;
|
||||
else j = 2;
|
||||
edge1 = next_edge_[edge1][j];
|
||||
}
|
||||
assert(edge1 != kInvalidEdge);
|
||||
if (edge2 != kInvalidEdge) next_edge_[edge2][j] = next_edge_[edge1][i];
|
||||
else first_edge_[vertex] = next_edge_[edge1][i];
|
||||
--vertex_degree_[vertex];
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace cxxmph
|
||||
|
@ -1,26 +1,43 @@
|
||||
#ifndef __CXXMPH_TRIGRAPH_H__
|
||||
#define __CXXMPH_TRIGRAPH_H__
|
||||
// Build a trigraph using a memory efficient representation.
|
||||
//
|
||||
// Prior knowledge of the number of edges and vertices for the graph is
|
||||
// required. For each vertex, we store how many edges touch it (degree) and the
|
||||
// index of the first edge in the vector of triples representing the edges.
|
||||
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "../src/cmph_types.h"
|
||||
|
||||
namespace cxxmph {
|
||||
|
||||
class TriGraph {
|
||||
struct Edge {
|
||||
Edge() { }
|
||||
Edge(cmph_uint32 v0, cmph_uint32 v1, cmph_uint32 v2);
|
||||
cmph_uint32& operator[](cmph_uint8 v) { return vertices[v]; }
|
||||
const cmph_uint32& operator[](cmph_uint8 v) const { return vertices[v]; }
|
||||
cmph_uint32 vertices[3];
|
||||
};
|
||||
struct ConnectedEdge {
|
||||
Edge current;
|
||||
Edge next;
|
||||
};
|
||||
|
||||
TriGraph(cmph_uint32 nedges, cmph_uint32 nvertices);
|
||||
void AddEdge(const Edge& edge);
|
||||
void RemoveEdge(cmph_uint32 current_edge);
|
||||
void ExtractEdgesAndClear(std::vector<ConnectedEdge>* edges);
|
||||
void RemoveEdge(cmph_uint32 edge_id);
|
||||
void ExtractEdgesAndClear(std::vector<Edge>* edges);
|
||||
|
||||
const std::vector<Edge>& edges() const { return edges_; }
|
||||
const std::vector<cmph_uint8>& vertex_degree() const { return vertex_degree_; }
|
||||
const std::vector<cmph_uint32>& first_edge() const { return first_edge_; }
|
||||
|
||||
private:
|
||||
cmph_uint32 nedges_;
|
||||
std::vector<ConnectedEdge> edges_;
|
||||
std::vector<cmph_uint32> first_edge_;
|
||||
std::vector<cmph_uint8> vertex_degree_;
|
||||
cmph_uint32 nedges_; // total number of edges
|
||||
std::vector<Edge> edges_;
|
||||
std::vector<Edge> next_edge_; // for implementing removal
|
||||
std::vector<cmph_uint32> first_edge_; // the first edge for this vertex
|
||||
std::vector<cmph_uint8> vertex_degree_; // number of edges for this vertex
|
||||
};
|
||||
|
||||
} // namespace cxxmph
|
||||
|
||||
#endif // __CXXMPH_TRIGRAPH_H__
|
||||
|
Loading…
Reference in New Issue
Block a user