Dumping cmph_uint32.
This commit is contained in:
parent
2a35666bfa
commit
5b78c02da0
@ -1,9 +1,10 @@
|
||||
noinst_PROGRAMS = cmph_hash_map_test mphtable_test trigraph_test
|
||||
check_PROGRAMS = cmph_hash_map_test mphtable_test trigraph_test bm_urls
|
||||
noinst_PROGRAMS = bm_urls
|
||||
bin_PROGRAMS = cxxmph
|
||||
lib_LTLIBRARIES = libcxxmph.la
|
||||
include_HEADERS = cmph_hash_map.h mphtable.h MurmurHash2.h trigraph.h cmph_hash_function.h stringpiece.h
|
||||
include_HEADERS = cmph_hash_map.h mphtable.h MurmurHash2.h trigraph.h cxxmph_hash.h stringpiece.h
|
||||
|
||||
libcxxmph_la_SOURCES = MurmurHash2.h trigragh.h trigraph.cc mphtable.h mphtable.cc cmph_hash_function.h stringpiece.h
|
||||
libcxxmph_la_SOURCES = MurmurHash2.h trigragh.h trigraph.cc mphtable.h mphtable.cc cxxmph_hash.h stringpiece.h
|
||||
libcxxmph_la_LDFLAGS = -version-info 0:0:0
|
||||
|
||||
cmph_hash_map_test_LDADD = libcxxmph.la
|
||||
@ -15,5 +16,8 @@ mphtable_test_SOURCES = mphtable_test.cc
|
||||
trigraph_test_LDADD = libcxxmph.la
|
||||
trigraph_test_SOURCES = trigraph_test.cc
|
||||
|
||||
bm_urls_LDADD = libcxxmph.la
|
||||
bm_urls_SOURCES = bm_urls.cc
|
||||
|
||||
cxxmph_LDADD = libcxxmph.la
|
||||
cxxmph_SOURCES = cxxmph.cc
|
||||
|
@ -15,7 +15,7 @@
|
||||
// 2. It will not produce the same results on little-endian and big-endian
|
||||
// machines.
|
||||
|
||||
namespace {
|
||||
namespace cxxmph {
|
||||
|
||||
unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed )
|
||||
{
|
||||
@ -68,6 +68,6 @@ unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed )
|
||||
return h;
|
||||
}
|
||||
|
||||
}
|
||||
} // namespace cxxmph
|
||||
|
||||
#endif // __CXXMPH_MURMUR_HASH2__
|
||||
|
21
cxxmph/bm_urls.cc
Normal file
21
cxxmph/bm_urls.cc
Normal file
@ -0,0 +1,21 @@
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
|
||||
#include "mphtable.h"
|
||||
|
||||
using std::ifstream;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using cxxmph::SimpleMPHTable;
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
vector<string> urls;
|
||||
std::ifstream f("URLS1k");
|
||||
string buffer;
|
||||
while(std::getline(f, buffer)) urls.push_back(buffer);
|
||||
|
||||
SimpleMPHTable<string> table;
|
||||
table.Reset(urls.begin(), urls.end());
|
||||
}
|
@ -70,9 +70,9 @@ class cmph_hash_map {
|
||||
|
||||
void rehash();
|
||||
std::vector<value_type> values_;
|
||||
SimpleMPHTable<Key, typename OptimizedSeededHashFunction<HashFcn>::hash_function> table_;
|
||||
SimpleMPHTable<Key, typename cxxmph_hash<HashFcn>::hash_function> table_;
|
||||
// TODO(davi) optimize slack to no hold a copy of the key
|
||||
typedef typename std::unordered_map<Key, cmph_uint32, HashFcn, EqualKey, Alloc> slack_type;
|
||||
typedef typename std::unordered_map<Key, uint32_t, HashFcn, EqualKey, Alloc> slack_type;
|
||||
slack_type slack_;
|
||||
};
|
||||
|
||||
|
@ -1,29 +1,30 @@
|
||||
#include <stdint.h> // for uint32_t and friends
|
||||
|
||||
#include <cstdlib>
|
||||
#include <unordered_map> // for std::hash
|
||||
|
||||
#include "MurmurHash2.h"
|
||||
#include "stringpiece.h"
|
||||
#include "cmph_types.h"
|
||||
|
||||
namespace cxxmph {
|
||||
|
||||
template <class HashFcn>
|
||||
struct seeded_hash_function {
|
||||
template <class Key>
|
||||
cmph_uint32 operator()(const Key& k, cmph_uint32 seed) const {
|
||||
uint32_t operator()(const Key& k, uint32_t seed) const {
|
||||
return HashFcn()(k) ^ seed;
|
||||
}
|
||||
};
|
||||
|
||||
struct Murmur2 {
|
||||
template<class Key>
|
||||
cmph_uint32 operator()(const Key& k) const {
|
||||
uint32_t operator()(const Key& k) const {
|
||||
return MurmurHash2(k, sizeof(Key), 1 /* seed */);
|
||||
}
|
||||
};
|
||||
struct Murmur2StringPiece {
|
||||
template <class Key>
|
||||
cmph_uint32 operator()(const Key& k) const {
|
||||
uint32_t operator()(const Key& k) const {
|
||||
StringPiece s(k);
|
||||
return MurmurHash2(s.data(), s.length(), 1 /* seed */);
|
||||
}
|
||||
@ -32,7 +33,7 @@ struct Murmur2StringPiece {
|
||||
template <>
|
||||
struct seeded_hash_function<Murmur2> {
|
||||
template <class Key>
|
||||
cmph_uint32 operator()(const Key& k, cmph_uint32 seed) const {
|
||||
uint32_t operator()(const Key& k, uint32_t seed) const {
|
||||
return MurmurHash2(reinterpret_cast<const void*>(&k), sizeof(Key), seed);
|
||||
}
|
||||
};
|
||||
@ -40,42 +41,42 @@ struct seeded_hash_function<Murmur2> {
|
||||
template <>
|
||||
struct seeded_hash_function<Murmur2StringPiece> {
|
||||
template <class Key>
|
||||
cmph_uint32 operator()(const Key& k, cmph_uint32 seed) const {
|
||||
uint32_t operator()(const Key& k, uint32_t seed) const {
|
||||
StringPiece s(k);
|
||||
return MurmurHash2(s.data(), s.length(), seed);
|
||||
}
|
||||
};
|
||||
|
||||
template <class HashFcn> struct OptimizedSeededHashFunction
|
||||
template <class HashFcn> struct cxxmph_hash
|
||||
{ typedef seeded_hash_function<HashFcn> hash_function; };
|
||||
// Use Murmur2 instead for all types defined in std::hash, plus
|
||||
// std::string which is commonly extended.
|
||||
template <> struct OptimizedSeededHashFunction<std::hash<char*> >
|
||||
template <> struct cxxmph_hash<std::hash<char*> >
|
||||
{ typedef seeded_hash_function<Murmur2StringPiece> hash_function; };
|
||||
template <> struct OptimizedSeededHashFunction<std::hash<const char*> >
|
||||
template <> struct cxxmph_hash<std::hash<const char*> >
|
||||
{ typedef seeded_hash_function<Murmur2StringPiece> hash_function; };
|
||||
template <> struct OptimizedSeededHashFunction<std::hash<std::string> >
|
||||
template <> struct cxxmph_hash<std::hash<std::string> >
|
||||
{ typedef seeded_hash_function<Murmur2StringPiece> hash_function; };
|
||||
|
||||
template <> struct OptimizedSeededHashFunction<std::hash<char> >
|
||||
template <> struct cxxmph_hash<std::hash<char> >
|
||||
{ typedef seeded_hash_function<Murmur2> hash_function; };
|
||||
template <> struct OptimizedSeededHashFunction<std::hash<unsigned char> >
|
||||
template <> struct cxxmph_hash<std::hash<unsigned char> >
|
||||
{ typedef seeded_hash_function<Murmur2> hash_function; };
|
||||
template <> struct OptimizedSeededHashFunction<std::hash<short> >
|
||||
template <> struct cxxmph_hash<std::hash<short> >
|
||||
{ typedef seeded_hash_function<Murmur2> hash_function; };
|
||||
template <> struct OptimizedSeededHashFunction<std::hash<unsigned short> >
|
||||
template <> struct cxxmph_hash<std::hash<unsigned short> >
|
||||
{ typedef seeded_hash_function<Murmur2> hash_function; };
|
||||
template <> struct OptimizedSeededHashFunction<std::hash<int> >
|
||||
template <> struct cxxmph_hash<std::hash<int> >
|
||||
{ typedef seeded_hash_function<Murmur2> hash_function; };
|
||||
template <> struct OptimizedSeededHashFunction<std::hash<unsigned int> >
|
||||
template <> struct cxxmph_hash<std::hash<unsigned int> >
|
||||
{ typedef seeded_hash_function<Murmur2> hash_function; };
|
||||
template <> struct OptimizedSeededHashFunction<std::hash<long> >
|
||||
template <> struct cxxmph_hash<std::hash<long> >
|
||||
{ typedef seeded_hash_function<Murmur2> hash_function; };
|
||||
template <> struct OptimizedSeededHashFunction<std::hash<unsigned long> >
|
||||
template <> struct cxxmph_hash<std::hash<unsigned long> >
|
||||
{ typedef seeded_hash_function<Murmur2> hash_function; };
|
||||
template <> struct OptimizedSeededHashFunction<std::hash<long long> >
|
||||
template <> struct cxxmph_hash<std::hash<long long> >
|
||||
{ typedef seeded_hash_function<Murmur2> hash_function; };
|
||||
template <> struct OptimizedSeededHashFunction<std::hash<unsigned long long> >
|
||||
template <> struct cxxmph_hash<std::hash<unsigned long long> >
|
||||
{ typedef seeded_hash_function<Murmur2> hash_function; };
|
||||
|
||||
} // namespace cxxmph
|
@ -10,9 +10,9 @@ using std::vector;
|
||||
|
||||
namespace {
|
||||
|
||||
static const cmph_uint8 kUnassigned = 3;
|
||||
static const uint8_t kUnassigned = 3;
|
||||
// table used for looking up the number of assigned vertices to a 8-bit integer
|
||||
static cmph_uint8 kBdzLookupTable[] =
|
||||
static uint8_t kBdzLookupTable[] =
|
||||
{
|
||||
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
|
||||
@ -36,20 +36,20 @@ static cmph_uint8 kBdzLookupTable[] =
|
||||
|
||||
namespace cxxmph {
|
||||
|
||||
const cmph_uint8 MPHTable::valuemask[] = { 0xfc, 0xf3, 0xcf, 0x3f};
|
||||
const uint8_t MPHTable::valuemask[] = { 0xfc, 0xf3, 0xcf, 0x3f};
|
||||
|
||||
void MPHTable::clear() {
|
||||
// TODO(davi) impolement me
|
||||
}
|
||||
bool MPHTable::GenerateQueue(
|
||||
TriGraph* graph, vector<cmph_uint32>* queue_output) {
|
||||
cmph_uint32 queue_head = 0, queue_tail = 0;
|
||||
cmph_uint32 nedges = m_;
|
||||
cmph_uint32 nvertices = n_;
|
||||
TriGraph* graph, vector<uint32_t>* queue_output) {
|
||||
uint32_t queue_head = 0, queue_tail = 0;
|
||||
uint32_t nedges = m_;
|
||||
uint32_t nvertices = n_;
|
||||
// Relies on vector<bool> using 1 bit per element
|
||||
vector<bool> marked_edge(nedges + 1, false);
|
||||
vector<cmph_uint32> queue(nvertices, 0);
|
||||
for (cmph_uint32 i = 0; i < nedges; ++i) {
|
||||
vector<uint32_t> queue(nvertices, 0);
|
||||
for (uint32_t i = 0; i < nedges; ++i) {
|
||||
const TriGraph::Edge& e = graph->edges()[i];
|
||||
if (graph->vertex_degree()[e[0]] == 1 ||
|
||||
graph->vertex_degree()[e[1]] == 1 ||
|
||||
@ -62,7 +62,7 @@ bool MPHTable::GenerateQueue(
|
||||
}
|
||||
/*
|
||||
for (unsigned int i = 0; i < marked_edge.size(); ++i) {
|
||||
cerr << "vertex with degree " << static_cast<cmph_uint32>(graph->vertex_degree()[i]) << " marked " << marked_edge[i] << endl;
|
||||
cerr << "vertex with degree " << static_cast<uint32_t>(graph->vertex_degree()[i]) << " marked " << marked_edge[i] << endl;
|
||||
}
|
||||
for (unsigned int i = 0; i < queue.size(); ++i) {
|
||||
cerr << "vertex " << i << " queued at " << queue[i] << endl;
|
||||
@ -73,13 +73,13 @@ bool MPHTable::GenerateQueue(
|
||||
// cerr << "Queue head " << queue_head << " Queue tail " << queue_tail << endl;
|
||||
// graph->DebugGraph();
|
||||
while (queue_tail != queue_head) {
|
||||
cmph_uint32 current_edge = queue[queue_tail++];
|
||||
uint32_t current_edge = queue[queue_tail++];
|
||||
graph->RemoveEdge(current_edge);
|
||||
const TriGraph::Edge& e = graph->edges()[current_edge];
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
cmph_uint32 v = e[i];
|
||||
uint32_t v = e[i];
|
||||
if (graph->vertex_degree()[v] == 1) {
|
||||
cmph_uint32 first_edge = graph->first_edge()[v];
|
||||
uint32_t first_edge = graph->first_edge()[v];
|
||||
if (!marked_edge[first_edge]) {
|
||||
queue[queue_head++] = first_edge;
|
||||
marked_edge[first_edge] = true;
|
||||
@ -98,14 +98,14 @@ bool MPHTable::GenerateQueue(
|
||||
}
|
||||
|
||||
void MPHTable::Assigning(
|
||||
const vector<TriGraph::Edge>& edges, const vector<cmph_uint32>& queue) {
|
||||
cmph_uint32 current_edge = 0;
|
||||
const vector<TriGraph::Edge>& edges, const vector<uint32_t>& queue) {
|
||||
uint32_t current_edge = 0;
|
||||
vector<bool> marked_vertices(n_ + 1);
|
||||
// Initialize vector of half nibbles with all bits set.
|
||||
cmph_uint32 sizeg = static_cast<cmph_uint32>(ceil(n_/4.0));
|
||||
vector<cmph_uint8>(sizeg, std::numeric_limits<cmph_uint8>::max()).swap(g_);
|
||||
uint32_t sizeg = static_cast<uint32_t>(ceil(n_/4.0));
|
||||
vector<uint8_t>(sizeg, std::numeric_limits<uint8_t>::max()).swap(g_);
|
||||
|
||||
cmph_uint32 nedges = m_; // for legibility
|
||||
uint32_t nedges = m_; // for legibility
|
||||
for (int i = nedges - 1; i + 1 >= 1; --i) {
|
||||
current_edge = queue[i];
|
||||
const TriGraph::Edge& e = edges[current_edge];
|
||||
@ -144,20 +144,20 @@ void MPHTable::Assigning(
|
||||
}
|
||||
|
||||
void MPHTable::Ranking() {
|
||||
cmph_uint32 nbytes_total = static_cast<cmph_uint32>(ceil(n_ / 4.0));
|
||||
cmph_uint32 size = k_ >> 2U;
|
||||
cmph_uint32 ranktablesize = static_cast<cmph_uint32>(
|
||||
uint32_t nbytes_total = static_cast<uint32_t>(ceil(n_ / 4.0));
|
||||
uint32_t size = k_ >> 2U;
|
||||
uint32_t ranktablesize = static_cast<uint32_t>(
|
||||
ceil(n_ / static_cast<double>(k_)));
|
||||
// TODO(davi) Change swap of member classes for resize + memset to avoid
|
||||
// fragmentation
|
||||
vector<cmph_uint32> (ranktablesize).swap(ranktable_);;
|
||||
cmph_uint32 offset = 0;
|
||||
cmph_uint32 count = 0;
|
||||
cmph_uint32 i = 1;
|
||||
vector<uint32_t> (ranktablesize).swap(ranktable_);;
|
||||
uint32_t offset = 0;
|
||||
uint32_t count = 0;
|
||||
uint32_t i = 1;
|
||||
while (1) {
|
||||
if (i == ranktable_.size()) break;
|
||||
cmph_uint32 nbytes = size < nbytes_total ? size : nbytes_total;
|
||||
for (cmph_uint32 j = 0; j < nbytes; ++j) count += kBdzLookupTable[g_[offset + j]];
|
||||
uint32_t nbytes = size < nbytes_total ? size : nbytes_total;
|
||||
for (uint32_t j = 0; j < nbytes; ++j) count += kBdzLookupTable[g_[offset + j]];
|
||||
ranktable_[i] = count;
|
||||
offset += nbytes;
|
||||
nbytes_total -= size;
|
||||
@ -165,12 +165,12 @@ void MPHTable::Ranking() {
|
||||
}
|
||||
}
|
||||
|
||||
cmph_uint32 MPHTable::Rank(cmph_uint32 vertex) const {
|
||||
cmph_uint32 index = vertex >> b_;
|
||||
cmph_uint32 base_rank = ranktable_[index];
|
||||
cmph_uint32 beg_idx_v = index << b_;
|
||||
cmph_uint32 beg_idx_b = beg_idx_v >> 2;
|
||||
cmph_uint32 end_idx_b = vertex >> 2;
|
||||
uint32_t MPHTable::Rank(uint32_t vertex) const {
|
||||
uint32_t index = vertex >> b_;
|
||||
uint32_t base_rank = ranktable_[index];
|
||||
uint32_t beg_idx_v = index << b_;
|
||||
uint32_t beg_idx_b = beg_idx_v >> 2;
|
||||
uint32_t end_idx_b = vertex >> 2;
|
||||
while (beg_idx_b < end_idx_b) base_rank += kBdzLookupTable[g_[beg_idx_b++]];
|
||||
beg_idx_v = beg_idx_b << 2;
|
||||
// cerr << "beg_idx_v: " << beg_idx_v << endl;
|
||||
|
@ -3,6 +3,8 @@
|
||||
|
||||
// Minimal perfect hash abstraction implementing the BDZ algorithm
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <unordered_map> // for std::hash
|
||||
@ -13,61 +15,61 @@
|
||||
using std::cerr;
|
||||
using std::endl;
|
||||
|
||||
#include "cmph_hash_function.h"
|
||||
#include "cxxmph_hash.h"
|
||||
#include "trigraph.h"
|
||||
|
||||
namespace cxxmph {
|
||||
|
||||
class MPHTable {
|
||||
public:
|
||||
MPHTable(double c = 1.23, cmph_uint8 b = 7) :
|
||||
MPHTable(double c = 1.23, uint8_t b = 7) :
|
||||
c_(c), b_(b), m_(0), n_(0), k_(0), r_(0) { }
|
||||
~MPHTable() {}
|
||||
|
||||
template <class SeededHashFcn, class ForwardIterator>
|
||||
bool Reset(ForwardIterator begin, ForwardIterator end);
|
||||
template <class SeededHashFcn, class Key> // must agree with Reset
|
||||
cmph_uint32 index(const Key& x) const;
|
||||
cmph_uint32 size() const { return m_; }
|
||||
uint32_t index(const Key& x) const;
|
||||
uint32_t size() const { return m_; }
|
||||
void clear();
|
||||
|
||||
private:
|
||||
template <class SeededHashFcn, class ForwardIterator>
|
||||
bool Mapping(ForwardIterator begin, ForwardIterator end,
|
||||
std::vector<TriGraph::Edge>* edges,
|
||||
std::vector<cmph_uint32>* queue);
|
||||
bool GenerateQueue(TriGraph* graph, std::vector<cmph_uint32>* queue);
|
||||
std::vector<uint32_t>* queue);
|
||||
bool GenerateQueue(TriGraph* graph, std::vector<uint32_t>* queue);
|
||||
void Assigning(const std::vector<TriGraph::Edge>& edges,
|
||||
const std::vector<cmph_uint32>& queue);
|
||||
const std::vector<uint32_t>& queue);
|
||||
void Ranking();
|
||||
cmph_uint32 Rank(cmph_uint32 vertex) const;
|
||||
uint32_t Rank(uint32_t vertex) const;
|
||||
|
||||
// Algorithm parameters
|
||||
double c_; // Number of bits per key (? is it right)
|
||||
cmph_uint8 b_; // Number of bits of the kth index in the ranktable
|
||||
uint8_t b_; // Number of bits of the kth index in the ranktable
|
||||
|
||||
// Values used during generation
|
||||
cmph_uint32 m_; // edges count
|
||||
cmph_uint32 n_; // vertex count
|
||||
cmph_uint32 k_; // kth index in ranktable, $k = log_2(n=3r)\varepsilon$
|
||||
uint32_t m_; // edges count
|
||||
uint32_t n_; // vertex count
|
||||
uint32_t k_; // kth index in ranktable, $k = log_2(n=3r)\varepsilon$
|
||||
|
||||
// Values used during search
|
||||
|
||||
// Partition vertex count, derived from c parameter.
|
||||
cmph_uint32 r_;
|
||||
uint32_t r_;
|
||||
// The array containing the minimal perfect hash function graph.
|
||||
std::vector<cmph_uint8> g_;
|
||||
std::vector<uint8_t> g_;
|
||||
// The table used for the rank step of the minimal perfect hash function
|
||||
std::vector<cmph_uint32> ranktable_;
|
||||
std::vector<uint32_t> ranktable_;
|
||||
// The selected hash seed triplet for finding the edges in the minimal
|
||||
// perfect hash function graph.
|
||||
cmph_uint32 hash_seed_[3];
|
||||
uint32_t hash_seed_[3];
|
||||
|
||||
static const cmph_uint8 valuemask[];
|
||||
static void set_2bit_value(std::vector<cmph_uint8> *d, cmph_uint32 i, cmph_uint8 v) {
|
||||
static const uint8_t valuemask[];
|
||||
static void set_2bit_value(std::vector<uint8_t> *d, uint32_t i, uint8_t v) {
|
||||
(*d)[(i >> 2)] &= (v << ((i & 3) << 1)) | valuemask[i & 3];
|
||||
}
|
||||
static cmph_uint32 get_2bit_value(const std::vector<cmph_uint8>& d, cmph_uint32 i) {
|
||||
static uint32_t get_2bit_value(const std::vector<uint8_t>& d, uint32_t i) {
|
||||
return (d[(i >> 2)] >> ((i & 3) << 1)) & 3;
|
||||
}
|
||||
|
||||
@ -78,7 +80,7 @@ class MPHTable {
|
||||
template <class SeededHashFcn, class ForwardIterator>
|
||||
bool MPHTable::Reset(ForwardIterator begin, ForwardIterator end) {
|
||||
m_ = end - begin;
|
||||
r_ = static_cast<cmph_uint32>(ceil((c_*m_)/3));
|
||||
r_ = static_cast<uint32_t>(ceil((c_*m_)/3));
|
||||
if ((r_ % 2) == 0) r_ += 1;
|
||||
n_ = 3*r_;
|
||||
k_ = 1U << b_;
|
||||
@ -87,7 +89,7 @@ bool MPHTable::Reset(ForwardIterator begin, ForwardIterator end) {
|
||||
|
||||
int iterations = 10;
|
||||
std::vector<TriGraph::Edge> edges;
|
||||
std::vector<cmph_uint32> queue;
|
||||
std::vector<uint32_t> queue;
|
||||
while (1) {
|
||||
cerr << "Iterations missing: " << iterations << endl;
|
||||
for (int i = 0; i < 3; ++i) hash_seed_[i] = random() % m_;
|
||||
@ -106,14 +108,14 @@ bool MPHTable::Reset(ForwardIterator begin, ForwardIterator end) {
|
||||
template <class SeededHashFcn, class ForwardIterator>
|
||||
bool MPHTable::Mapping(
|
||||
ForwardIterator begin, ForwardIterator end,
|
||||
std::vector<TriGraph::Edge>* edges, std::vector<cmph_uint32>* queue) {
|
||||
std::vector<TriGraph::Edge>* edges, std::vector<uint32_t>* queue) {
|
||||
TriGraph graph(n_, m_);
|
||||
for (ForwardIterator it = begin; it != end; ++it) {
|
||||
cmph_uint32 h[3];
|
||||
uint32_t h[3];
|
||||
for (int i = 0; i < 3; ++i) h[i] = SeededHashFcn()(*it, hash_seed_[i]);
|
||||
cmph_uint32 v0 = h[0] % r_;
|
||||
cmph_uint32 v1 = h[1] % r_ + r_;
|
||||
cmph_uint32 v2 = h[2] % r_ + (r_ << 1);
|
||||
uint32_t v0 = h[0] % r_;
|
||||
uint32_t v1 = h[1] % r_ + r_;
|
||||
uint32_t v2 = h[2] % r_ + (r_ << 1);
|
||||
// cerr << "Key: " << *it << " edge " << it - begin << " (" << v0 << "," << v1 << "," << v2 << ")" << endl;
|
||||
graph.AddEdge(TriGraph::Edge(v0, v1, v2));
|
||||
}
|
||||
@ -125,8 +127,8 @@ bool MPHTable::Mapping(
|
||||
}
|
||||
|
||||
template <class SeededHashFcn, class Key>
|
||||
cmph_uint32 MPHTable::index(const Key& key) const {
|
||||
cmph_uint32 h[3];
|
||||
uint32_t MPHTable::index(const Key& key) const {
|
||||
uint32_t h[3];
|
||||
for (int i = 0; i < 3; ++i) h[i] = SeededHashFcn()(key, hash_seed_[i]);
|
||||
h[0] = h[0] % r_;
|
||||
h[1] = h[1] % r_ + r_;
|
||||
@ -136,19 +138,19 @@ cmph_uint32 MPHTable::index(const Key& key) const {
|
||||
assert((h[0] >> 2) <g_.size());
|
||||
assert((h[1] >> 2) <g_.size());
|
||||
assert((h[2] >> 2) <g_.size());
|
||||
cmph_uint32 vertex = h[(get_2bit_value(g_, h[0]) + get_2bit_value(g_, h[1]) + get_2bit_value(g_, h[2])) % 3];
|
||||
uint32_t vertex = h[(get_2bit_value(g_, h[0]) + get_2bit_value(g_, h[1]) + get_2bit_value(g_, h[2])) % 3];
|
||||
// cerr << "Search found vertex " << vertex << endl;
|
||||
return Rank(vertex);
|
||||
}
|
||||
|
||||
template <class Key, class HashFcn = typename OptimizedSeededHashFunction<std::hash<Key> >::hash_function>
|
||||
template <class Key, class HashFcn = typename cxxmph_hash<std::hash<Key> >::hash_function>
|
||||
class SimpleMPHTable : public MPHTable {
|
||||
public:
|
||||
template <class ForwardIterator>
|
||||
bool Reset(ForwardIterator begin, ForwardIterator end) {
|
||||
return MPHTable::Reset<HashFcn>(begin, end);
|
||||
}
|
||||
cmph_uint32 index(const Key& key) { return MPHTable::index<HashFcn>(key); }
|
||||
uint32_t index(const Key& key) { return MPHTable::index<HashFcn>(key); }
|
||||
};
|
||||
|
||||
} // namespace cxxmph
|
||||
|
@ -9,12 +9,12 @@ using std::endl;
|
||||
using std::vector;
|
||||
|
||||
namespace {
|
||||
static const cmph_uint32 kInvalidEdge = std::numeric_limits<cmph_uint32>::max();
|
||||
static const uint32_t kInvalidEdge = std::numeric_limits<uint32_t>::max();
|
||||
}
|
||||
|
||||
namespace cxxmph {
|
||||
|
||||
TriGraph::TriGraph(cmph_uint32 nvertices, cmph_uint32 nedges)
|
||||
TriGraph::TriGraph(uint32_t nvertices, uint32_t nedges)
|
||||
: nedges_(0),
|
||||
edges_(nedges),
|
||||
next_edge_(nedges),
|
||||
@ -23,8 +23,8 @@ TriGraph::TriGraph(cmph_uint32 nvertices, cmph_uint32 nedges)
|
||||
|
||||
void TriGraph::ExtractEdgesAndClear(vector<Edge>* edges) {
|
||||
vector<Edge>().swap(next_edge_);
|
||||
vector<cmph_uint32>().swap(first_edge_);
|
||||
vector<cmph_uint8>().swap(vertex_degree_);
|
||||
vector<uint32_t>().swap(first_edge_);
|
||||
vector<uint8_t>().swap(vertex_degree_);
|
||||
nedges_ = 0;
|
||||
edges->swap(edges_);
|
||||
}
|
||||
@ -45,13 +45,13 @@ void TriGraph::AddEdge(const Edge& edge) {
|
||||
++nedges_;
|
||||
}
|
||||
|
||||
void TriGraph::RemoveEdge(cmph_uint32 current_edge) {
|
||||
void TriGraph::RemoveEdge(uint32_t current_edge) {
|
||||
// cerr << "Removing edge " << current_edge << " from " << nedges_ << " existing edges " << endl;
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
cmph_uint32 vertex = edges_[current_edge][i];
|
||||
cmph_uint32 edge1 = first_edge_[vertex];
|
||||
cmph_uint32 edge2 = kInvalidEdge;
|
||||
cmph_uint32 j = 0;
|
||||
uint32_t vertex = edges_[current_edge][i];
|
||||
uint32_t edge1 = first_edge_[vertex];
|
||||
uint32_t edge2 = kInvalidEdge;
|
||||
uint32_t j = 0;
|
||||
while (edge1 != current_edge && edge1 != kInvalidEdge) {
|
||||
edge2 = edge1;
|
||||
if (edges_[edge1][0] == vertex) j = 0;
|
||||
|
@ -6,42 +6,41 @@
|
||||
// required. For each vertex, we store how many edges touch it (degree) and the
|
||||
// index of the first edge in the vector of triples representing the edges.
|
||||
|
||||
#include <stdint.h> // for uint32_t and friends
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "cmph_types.h"
|
||||
|
||||
namespace cxxmph {
|
||||
|
||||
class TriGraph {
|
||||
public:
|
||||
struct Edge {
|
||||
Edge() { }
|
||||
Edge(cmph_uint32 v0, cmph_uint32 v1, cmph_uint32 v2) {
|
||||
Edge(uint32_t v0, uint32_t v1, uint32_t v2) {
|
||||
vertices[0] = v0;
|
||||
vertices[1] = v1;
|
||||
vertices[2] = v2;
|
||||
}
|
||||
cmph_uint32& operator[](cmph_uint8 v) { return vertices[v]; }
|
||||
const cmph_uint32& operator[](cmph_uint8 v) const { return vertices[v]; }
|
||||
cmph_uint32 vertices[3];
|
||||
uint32_t& operator[](uint8_t v) { return vertices[v]; }
|
||||
const uint32_t& operator[](uint8_t v) const { return vertices[v]; }
|
||||
uint32_t vertices[3];
|
||||
};
|
||||
TriGraph(cmph_uint32 nedges, cmph_uint32 nvertices);
|
||||
TriGraph(uint32_t nedges, uint32_t nvertices);
|
||||
void AddEdge(const Edge& edge);
|
||||
void RemoveEdge(cmph_uint32 edge_id);
|
||||
void RemoveEdge(uint32_t edge_id);
|
||||
void ExtractEdgesAndClear(std::vector<Edge>* edges);
|
||||
void DebugGraph() const;
|
||||
|
||||
const std::vector<Edge>& edges() const { return edges_; }
|
||||
const std::vector<cmph_uint8>& vertex_degree() const { return vertex_degree_; }
|
||||
const std::vector<cmph_uint32>& first_edge() const { return first_edge_; }
|
||||
const std::vector<uint8_t>& vertex_degree() const { return vertex_degree_; }
|
||||
const std::vector<uint32_t>& first_edge() const { return first_edge_; }
|
||||
|
||||
private:
|
||||
cmph_uint32 nedges_; // total number of edges
|
||||
uint32_t nedges_; // total number of edges
|
||||
std::vector<Edge> edges_;
|
||||
std::vector<Edge> next_edge_; // for implementing removal
|
||||
std::vector<cmph_uint32> first_edge_; // the first edge for this vertex
|
||||
std::vector<cmph_uint8> vertex_degree_; // number of edges for this vertex
|
||||
std::vector<uint32_t> first_edge_; // the first edge for this vertex
|
||||
std::vector<uint8_t> vertex_degree_; // number of edges for this vertex
|
||||
};
|
||||
|
||||
} // namespace cxxmph
|
||||
|
@ -1,22 +0,0 @@
|
||||
#include <cassert>
|
||||
|
||||
#include "trigraph.h"
|
||||
|
||||
using cxxmph::TriGraph;
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
TriGraph g(4, 2);
|
||||
g.AddEdge(TriGraph::Edge(0, 1, 2));
|
||||
g.AddEdge(TriGraph::Edge(1, 3, 2));
|
||||
assert(g.vertex_degree()[0] == 1);
|
||||
assert(g.vertex_degree()[1] == 2);
|
||||
assert(g.vertex_degree()[2] == 2);
|
||||
assert(g.vertex_degree()[3] == 1);
|
||||
g.RemoveEdge(0);
|
||||
assert(g.vertex_degree()[0] == 0);
|
||||
assert(g.vertex_degree()[1] == 1);
|
||||
assert(g.vertex_degree()[2] == 1);
|
||||
assert(g.vertex_degree()[3] == 1);
|
||||
std::vector<TriGraph::Edge> edges;
|
||||
g.ExtractEdgesAndClear(&edges);
|
||||
}
|
@ -1,4 +1,5 @@
|
||||
bin_PROGRAMS = cmph
|
||||
check_PROGRAMS = cmph_benchmark_test
|
||||
lib_LTLIBRARIES = libcmph.la
|
||||
include_HEADERS = cmph.h cmph_types.h cmph_time.h chd_ph.h
|
||||
libcmph_la_SOURCES = hash.h hash.c \
|
||||
@ -30,3 +31,6 @@ libcmph_la_LDFLAGS = -version-info 0:0:0
|
||||
|
||||
cmph_SOURCES = main.c wingetopt.h wingetopt.c
|
||||
cmph_LDADD = libcmph.la
|
||||
|
||||
cmph_benchmark_test_SOURCES = cmph_benchmark_test.cc
|
||||
cmph_benchmark_test_LDADD = libcmph.la
|
||||
|
@ -1,3 +1,5 @@
|
||||
// A simple benchmark tool around getrusage
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
@ -42,17 +44,19 @@ int timeval_subtract (
|
||||
|
||||
benchmark_t* find_benchmark(const char* name) {
|
||||
benchmark_t* benchmark = global_benchmarks;
|
||||
while (benchmark->name != NULL) if (strcmp(benchmark->name, name) != 0) break;
|
||||
if (!benchmark->name) return NULL;
|
||||
while (benchmark && benchmark->name != NULL) {
|
||||
if (strcmp(benchmark->name, name) == 0) break;
|
||||
++benchmark;
|
||||
}
|
||||
if (!benchmark || !benchmark->name) return NULL;
|
||||
return benchmark;
|
||||
}
|
||||
|
||||
int global_benchmarks_length() {
|
||||
benchmark_t* benchmark;
|
||||
benchmark_t* benchmark = global_benchmarks;
|
||||
int length = 0;
|
||||
if (global_benchmarks == 0) return 0;
|
||||
benchmark = global_benchmarks;
|
||||
while (benchmark->name != NULL) ++length;
|
||||
if (benchmark == NULL) return 0;
|
||||
while (benchmark->name != NULL) ++length, ++benchmark;
|
||||
return length;
|
||||
}
|
||||
|
||||
@ -62,8 +66,11 @@ void bm_register(const char* name, void (*func)(int), int iters) {
|
||||
benchmark.name = name;
|
||||
benchmark.func = func;
|
||||
assert(!find_benchmark(name));
|
||||
global_benchmarks = realloc(global_benchmarks, length + 1);
|
||||
global_benchmarks = realloc(
|
||||
global_benchmarks, (length + 2)*sizeof(benchmark_t));
|
||||
global_benchmarks[length] = benchmark;
|
||||
memset(&benchmark, 0, sizeof(benchmark_t)); // pivot
|
||||
global_benchmarks[length + 1] = benchmark;
|
||||
}
|
||||
|
||||
void bm_start(const char* name) {
|
||||
@ -71,6 +78,7 @@ void bm_start(const char* name) {
|
||||
struct rusage rs;
|
||||
|
||||
benchmark = find_benchmark(name);
|
||||
assert(benchmark);
|
||||
int ret = getrusage(RUSAGE_SELF, &rs);
|
||||
if (ret != 0) {
|
||||
perror("rusage failed");
|
||||
@ -98,6 +106,19 @@ void bm_end(const char* name) {
|
||||
struct timeval stime;
|
||||
timeval_subtract(&stime, &benchmark->end.ru_stime, &benchmark->begin.ru_stime);
|
||||
|
||||
printf("User cpu time used: %ld.%6ld\n", utime.tv_sec, utime.tv_usec);
|
||||
printf("System cpu time used: %ld.%6ld\n", stime.tv_sec, stime.tv_usec);
|
||||
printf("Benchmark: %s\n", benchmark->name);
|
||||
printf("User time used : %ld.%6ld\n", utime.tv_sec, utime.tv_usec);
|
||||
printf("System time used: %ld.%6ld\n", stime.tv_sec, stime.tv_usec);
|
||||
printf("Wall time used : %ld.%6ld\n", stime.tv_sec, stime.tv_usec);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void run_benchmarks(int argc, char** argv) {
|
||||
benchmark_t* benchmark = global_benchmarks;
|
||||
while (benchmark && benchmark->name != NULL) {
|
||||
bm_start(benchmark->name);
|
||||
bm_end(benchmark->name);
|
||||
++benchmark;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -9,7 +9,7 @@ extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
#define BM_REGISTER(func, iters) bm_register(##func, func, iters);
|
||||
#define BM_REGISTER(func, iters) bm_register(#func, func, iters)
|
||||
void bm_register(const char* name, void (*func)(int), int iters);
|
||||
void run_benchmarks(int argc, char** argv);
|
||||
|
||||
|
22
src/cmph_benchmark_test.cc
Normal file
22
src/cmph_benchmark_test.cc
Normal file
@ -0,0 +1,22 @@
|
||||
#include <unistd.h> // for sleep
|
||||
#include <limits.h>
|
||||
|
||||
#include "cmph_benchmark.h"
|
||||
|
||||
void bm_sleep(int iters) {
|
||||
sleep(1);
|
||||
}
|
||||
|
||||
void bm_increment(int iters) {
|
||||
int i, v = 0;
|
||||
for (i = 0; i < INT_MAX; ++i) {
|
||||
v += i;
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
BM_REGISTER(bm_sleep, 1);
|
||||
BM_REGISTER(bm_increment, 1);
|
||||
run_benchmarks(argc, argv);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user