Dumping cmph_uint32.

This commit is contained in:
Davi de Castro Reis 2011-02-13 20:40:26 -02:00
parent 2a35666bfa
commit 5b78c02da0
14 changed files with 197 additions and 145 deletions

View File

@ -1,9 +1,10 @@
noinst_PROGRAMS = cmph_hash_map_test mphtable_test trigraph_test check_PROGRAMS = cmph_hash_map_test mphtable_test trigraph_test bm_urls
noinst_PROGRAMS = bm_urls
bin_PROGRAMS = cxxmph bin_PROGRAMS = cxxmph
lib_LTLIBRARIES = libcxxmph.la lib_LTLIBRARIES = libcxxmph.la
include_HEADERS = cmph_hash_map.h mphtable.h MurmurHash2.h trigraph.h cmph_hash_function.h stringpiece.h include_HEADERS = cmph_hash_map.h mphtable.h MurmurHash2.h trigraph.h cxxmph_hash.h stringpiece.h
libcxxmph_la_SOURCES = MurmurHash2.h trigragh.h trigraph.cc mphtable.h mphtable.cc cmph_hash_function.h stringpiece.h libcxxmph_la_SOURCES = MurmurHash2.h trigragh.h trigraph.cc mphtable.h mphtable.cc cxxmph_hash.h stringpiece.h
libcxxmph_la_LDFLAGS = -version-info 0:0:0 libcxxmph_la_LDFLAGS = -version-info 0:0:0
cmph_hash_map_test_LDADD = libcxxmph.la cmph_hash_map_test_LDADD = libcxxmph.la
@ -15,5 +16,8 @@ mphtable_test_SOURCES = mphtable_test.cc
trigraph_test_LDADD = libcxxmph.la trigraph_test_LDADD = libcxxmph.la
trigraph_test_SOURCES = trigraph_test.cc trigraph_test_SOURCES = trigraph_test.cc
bm_urls_LDADD = libcxxmph.la
bm_urls_SOURCES = bm_urls.cc
cxxmph_LDADD = libcxxmph.la cxxmph_LDADD = libcxxmph.la
cxxmph_SOURCES = cxxmph.cc cxxmph_SOURCES = cxxmph.cc

View File

@ -15,7 +15,7 @@
// 2. It will not produce the same results on little-endian and big-endian // 2. It will not produce the same results on little-endian and big-endian
// machines. // machines.
namespace { namespace cxxmph {
unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed ) unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed )
{ {
@ -68,6 +68,6 @@ unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed )
return h; return h;
} }
} } // namespace cxxmph
#endif // __CXXMPH_MURMUR_HASH2__ #endif // __CXXMPH_MURMUR_HASH2__

21
cxxmph/bm_urls.cc Normal file
View File

@ -0,0 +1,21 @@
#include <string>
#include <iostream>
#include <fstream>
#include <vector>
#include "mphtable.h"
using std::ifstream;
using std::string;
using std::vector;
using cxxmph::SimpleMPHTable;
int main(int argc, char** argv) {
vector<string> urls;
std::ifstream f("URLS1k");
string buffer;
while(std::getline(f, buffer)) urls.push_back(buffer);
SimpleMPHTable<string> table;
table.Reset(urls.begin(), urls.end());
}

View File

@ -70,9 +70,9 @@ class cmph_hash_map {
void rehash(); void rehash();
std::vector<value_type> values_; std::vector<value_type> values_;
SimpleMPHTable<Key, typename OptimizedSeededHashFunction<HashFcn>::hash_function> table_; SimpleMPHTable<Key, typename cxxmph_hash<HashFcn>::hash_function> table_;
// TODO(davi) optimize slack to no hold a copy of the key // TODO(davi) optimize slack to no hold a copy of the key
typedef typename std::unordered_map<Key, cmph_uint32, HashFcn, EqualKey, Alloc> slack_type; typedef typename std::unordered_map<Key, uint32_t, HashFcn, EqualKey, Alloc> slack_type;
slack_type slack_; slack_type slack_;
}; };

View File

@ -1,29 +1,30 @@
#include <stdint.h> // for uint32_t and friends
#include <cstdlib> #include <cstdlib>
#include <unordered_map> // for std::hash #include <unordered_map> // for std::hash
#include "MurmurHash2.h" #include "MurmurHash2.h"
#include "stringpiece.h" #include "stringpiece.h"
#include "cmph_types.h"
namespace cxxmph { namespace cxxmph {
template <class HashFcn> template <class HashFcn>
struct seeded_hash_function { struct seeded_hash_function {
template <class Key> template <class Key>
cmph_uint32 operator()(const Key& k, cmph_uint32 seed) const { uint32_t operator()(const Key& k, uint32_t seed) const {
return HashFcn()(k) ^ seed; return HashFcn()(k) ^ seed;
} }
}; };
struct Murmur2 { struct Murmur2 {
template<class Key> template<class Key>
cmph_uint32 operator()(const Key& k) const { uint32_t operator()(const Key& k) const {
return MurmurHash2(k, sizeof(Key), 1 /* seed */); return MurmurHash2(k, sizeof(Key), 1 /* seed */);
} }
}; };
struct Murmur2StringPiece { struct Murmur2StringPiece {
template <class Key> template <class Key>
cmph_uint32 operator()(const Key& k) const { uint32_t operator()(const Key& k) const {
StringPiece s(k); StringPiece s(k);
return MurmurHash2(s.data(), s.length(), 1 /* seed */); return MurmurHash2(s.data(), s.length(), 1 /* seed */);
} }
@ -32,7 +33,7 @@ struct Murmur2StringPiece {
template <> template <>
struct seeded_hash_function<Murmur2> { struct seeded_hash_function<Murmur2> {
template <class Key> template <class Key>
cmph_uint32 operator()(const Key& k, cmph_uint32 seed) const { uint32_t operator()(const Key& k, uint32_t seed) const {
return MurmurHash2(reinterpret_cast<const void*>(&k), sizeof(Key), seed); return MurmurHash2(reinterpret_cast<const void*>(&k), sizeof(Key), seed);
} }
}; };
@ -40,42 +41,42 @@ struct seeded_hash_function<Murmur2> {
template <> template <>
struct seeded_hash_function<Murmur2StringPiece> { struct seeded_hash_function<Murmur2StringPiece> {
template <class Key> template <class Key>
cmph_uint32 operator()(const Key& k, cmph_uint32 seed) const { uint32_t operator()(const Key& k, uint32_t seed) const {
StringPiece s(k); StringPiece s(k);
return MurmurHash2(s.data(), s.length(), seed); return MurmurHash2(s.data(), s.length(), seed);
} }
}; };
template <class HashFcn> struct OptimizedSeededHashFunction template <class HashFcn> struct cxxmph_hash
{ typedef seeded_hash_function<HashFcn> hash_function; }; { typedef seeded_hash_function<HashFcn> hash_function; };
// Use Murmur2 instead for all types defined in std::hash, plus // Use Murmur2 instead for all types defined in std::hash, plus
// std::string which is commonly extended. // std::string which is commonly extended.
template <> struct OptimizedSeededHashFunction<std::hash<char*> > template <> struct cxxmph_hash<std::hash<char*> >
{ typedef seeded_hash_function<Murmur2StringPiece> hash_function; }; { typedef seeded_hash_function<Murmur2StringPiece> hash_function; };
template <> struct OptimizedSeededHashFunction<std::hash<const char*> > template <> struct cxxmph_hash<std::hash<const char*> >
{ typedef seeded_hash_function<Murmur2StringPiece> hash_function; }; { typedef seeded_hash_function<Murmur2StringPiece> hash_function; };
template <> struct OptimizedSeededHashFunction<std::hash<std::string> > template <> struct cxxmph_hash<std::hash<std::string> >
{ typedef seeded_hash_function<Murmur2StringPiece> hash_function; }; { typedef seeded_hash_function<Murmur2StringPiece> hash_function; };
template <> struct OptimizedSeededHashFunction<std::hash<char> > template <> struct cxxmph_hash<std::hash<char> >
{ typedef seeded_hash_function<Murmur2> hash_function; }; { typedef seeded_hash_function<Murmur2> hash_function; };
template <> struct OptimizedSeededHashFunction<std::hash<unsigned char> > template <> struct cxxmph_hash<std::hash<unsigned char> >
{ typedef seeded_hash_function<Murmur2> hash_function; }; { typedef seeded_hash_function<Murmur2> hash_function; };
template <> struct OptimizedSeededHashFunction<std::hash<short> > template <> struct cxxmph_hash<std::hash<short> >
{ typedef seeded_hash_function<Murmur2> hash_function; }; { typedef seeded_hash_function<Murmur2> hash_function; };
template <> struct OptimizedSeededHashFunction<std::hash<unsigned short> > template <> struct cxxmph_hash<std::hash<unsigned short> >
{ typedef seeded_hash_function<Murmur2> hash_function; }; { typedef seeded_hash_function<Murmur2> hash_function; };
template <> struct OptimizedSeededHashFunction<std::hash<int> > template <> struct cxxmph_hash<std::hash<int> >
{ typedef seeded_hash_function<Murmur2> hash_function; }; { typedef seeded_hash_function<Murmur2> hash_function; };
template <> struct OptimizedSeededHashFunction<std::hash<unsigned int> > template <> struct cxxmph_hash<std::hash<unsigned int> >
{ typedef seeded_hash_function<Murmur2> hash_function; }; { typedef seeded_hash_function<Murmur2> hash_function; };
template <> struct OptimizedSeededHashFunction<std::hash<long> > template <> struct cxxmph_hash<std::hash<long> >
{ typedef seeded_hash_function<Murmur2> hash_function; }; { typedef seeded_hash_function<Murmur2> hash_function; };
template <> struct OptimizedSeededHashFunction<std::hash<unsigned long> > template <> struct cxxmph_hash<std::hash<unsigned long> >
{ typedef seeded_hash_function<Murmur2> hash_function; }; { typedef seeded_hash_function<Murmur2> hash_function; };
template <> struct OptimizedSeededHashFunction<std::hash<long long> > template <> struct cxxmph_hash<std::hash<long long> >
{ typedef seeded_hash_function<Murmur2> hash_function; }; { typedef seeded_hash_function<Murmur2> hash_function; };
template <> struct OptimizedSeededHashFunction<std::hash<unsigned long long> > template <> struct cxxmph_hash<std::hash<unsigned long long> >
{ typedef seeded_hash_function<Murmur2> hash_function; }; { typedef seeded_hash_function<Murmur2> hash_function; };
} // namespace cxxmph } // namespace cxxmph

View File

@ -10,9 +10,9 @@ using std::vector;
namespace { namespace {
static const cmph_uint8 kUnassigned = 3; static const uint8_t kUnassigned = 3;
// table used for looking up the number of assigned vertices to a 8-bit integer // table used for looking up the number of assigned vertices to a 8-bit integer
static cmph_uint8 kBdzLookupTable[] = static uint8_t kBdzLookupTable[] =
{ {
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
@ -36,20 +36,20 @@ static cmph_uint8 kBdzLookupTable[] =
namespace cxxmph { namespace cxxmph {
const cmph_uint8 MPHTable::valuemask[] = { 0xfc, 0xf3, 0xcf, 0x3f}; const uint8_t MPHTable::valuemask[] = { 0xfc, 0xf3, 0xcf, 0x3f};
void MPHTable::clear() { void MPHTable::clear() {
// TODO(davi) impolement me // TODO(davi) impolement me
} }
bool MPHTable::GenerateQueue( bool MPHTable::GenerateQueue(
TriGraph* graph, vector<cmph_uint32>* queue_output) { TriGraph* graph, vector<uint32_t>* queue_output) {
cmph_uint32 queue_head = 0, queue_tail = 0; uint32_t queue_head = 0, queue_tail = 0;
cmph_uint32 nedges = m_; uint32_t nedges = m_;
cmph_uint32 nvertices = n_; uint32_t nvertices = n_;
// Relies on vector<bool> using 1 bit per element // Relies on vector<bool> using 1 bit per element
vector<bool> marked_edge(nedges + 1, false); vector<bool> marked_edge(nedges + 1, false);
vector<cmph_uint32> queue(nvertices, 0); vector<uint32_t> queue(nvertices, 0);
for (cmph_uint32 i = 0; i < nedges; ++i) { for (uint32_t i = 0; i < nedges; ++i) {
const TriGraph::Edge& e = graph->edges()[i]; const TriGraph::Edge& e = graph->edges()[i];
if (graph->vertex_degree()[e[0]] == 1 || if (graph->vertex_degree()[e[0]] == 1 ||
graph->vertex_degree()[e[1]] == 1 || graph->vertex_degree()[e[1]] == 1 ||
@ -62,7 +62,7 @@ bool MPHTable::GenerateQueue(
} }
/* /*
for (unsigned int i = 0; i < marked_edge.size(); ++i) { for (unsigned int i = 0; i < marked_edge.size(); ++i) {
cerr << "vertex with degree " << static_cast<cmph_uint32>(graph->vertex_degree()[i]) << " marked " << marked_edge[i] << endl; cerr << "vertex with degree " << static_cast<uint32_t>(graph->vertex_degree()[i]) << " marked " << marked_edge[i] << endl;
} }
for (unsigned int i = 0; i < queue.size(); ++i) { for (unsigned int i = 0; i < queue.size(); ++i) {
cerr << "vertex " << i << " queued at " << queue[i] << endl; cerr << "vertex " << i << " queued at " << queue[i] << endl;
@ -73,13 +73,13 @@ bool MPHTable::GenerateQueue(
// cerr << "Queue head " << queue_head << " Queue tail " << queue_tail << endl; // cerr << "Queue head " << queue_head << " Queue tail " << queue_tail << endl;
// graph->DebugGraph(); // graph->DebugGraph();
while (queue_tail != queue_head) { while (queue_tail != queue_head) {
cmph_uint32 current_edge = queue[queue_tail++]; uint32_t current_edge = queue[queue_tail++];
graph->RemoveEdge(current_edge); graph->RemoveEdge(current_edge);
const TriGraph::Edge& e = graph->edges()[current_edge]; const TriGraph::Edge& e = graph->edges()[current_edge];
for (int i = 0; i < 3; ++i) { for (int i = 0; i < 3; ++i) {
cmph_uint32 v = e[i]; uint32_t v = e[i];
if (graph->vertex_degree()[v] == 1) { if (graph->vertex_degree()[v] == 1) {
cmph_uint32 first_edge = graph->first_edge()[v]; uint32_t first_edge = graph->first_edge()[v];
if (!marked_edge[first_edge]) { if (!marked_edge[first_edge]) {
queue[queue_head++] = first_edge; queue[queue_head++] = first_edge;
marked_edge[first_edge] = true; marked_edge[first_edge] = true;
@ -98,14 +98,14 @@ bool MPHTable::GenerateQueue(
} }
void MPHTable::Assigning( void MPHTable::Assigning(
const vector<TriGraph::Edge>& edges, const vector<cmph_uint32>& queue) { const vector<TriGraph::Edge>& edges, const vector<uint32_t>& queue) {
cmph_uint32 current_edge = 0; uint32_t current_edge = 0;
vector<bool> marked_vertices(n_ + 1); vector<bool> marked_vertices(n_ + 1);
// Initialize vector of half nibbles with all bits set. // Initialize vector of half nibbles with all bits set.
cmph_uint32 sizeg = static_cast<cmph_uint32>(ceil(n_/4.0)); uint32_t sizeg = static_cast<uint32_t>(ceil(n_/4.0));
vector<cmph_uint8>(sizeg, std::numeric_limits<cmph_uint8>::max()).swap(g_); vector<uint8_t>(sizeg, std::numeric_limits<uint8_t>::max()).swap(g_);
cmph_uint32 nedges = m_; // for legibility uint32_t nedges = m_; // for legibility
for (int i = nedges - 1; i + 1 >= 1; --i) { for (int i = nedges - 1; i + 1 >= 1; --i) {
current_edge = queue[i]; current_edge = queue[i];
const TriGraph::Edge& e = edges[current_edge]; const TriGraph::Edge& e = edges[current_edge];
@ -144,20 +144,20 @@ void MPHTable::Assigning(
} }
void MPHTable::Ranking() { void MPHTable::Ranking() {
cmph_uint32 nbytes_total = static_cast<cmph_uint32>(ceil(n_ / 4.0)); uint32_t nbytes_total = static_cast<uint32_t>(ceil(n_ / 4.0));
cmph_uint32 size = k_ >> 2U; uint32_t size = k_ >> 2U;
cmph_uint32 ranktablesize = static_cast<cmph_uint32>( uint32_t ranktablesize = static_cast<uint32_t>(
ceil(n_ / static_cast<double>(k_))); ceil(n_ / static_cast<double>(k_)));
// TODO(davi) Change swap of member classes for resize + memset to avoid // TODO(davi) Change swap of member classes for resize + memset to avoid
// fragmentation // fragmentation
vector<cmph_uint32> (ranktablesize).swap(ranktable_);; vector<uint32_t> (ranktablesize).swap(ranktable_);;
cmph_uint32 offset = 0; uint32_t offset = 0;
cmph_uint32 count = 0; uint32_t count = 0;
cmph_uint32 i = 1; uint32_t i = 1;
while (1) { while (1) {
if (i == ranktable_.size()) break; if (i == ranktable_.size()) break;
cmph_uint32 nbytes = size < nbytes_total ? size : nbytes_total; uint32_t nbytes = size < nbytes_total ? size : nbytes_total;
for (cmph_uint32 j = 0; j < nbytes; ++j) count += kBdzLookupTable[g_[offset + j]]; for (uint32_t j = 0; j < nbytes; ++j) count += kBdzLookupTable[g_[offset + j]];
ranktable_[i] = count; ranktable_[i] = count;
offset += nbytes; offset += nbytes;
nbytes_total -= size; nbytes_total -= size;
@ -165,12 +165,12 @@ void MPHTable::Ranking() {
} }
} }
cmph_uint32 MPHTable::Rank(cmph_uint32 vertex) const { uint32_t MPHTable::Rank(uint32_t vertex) const {
cmph_uint32 index = vertex >> b_; uint32_t index = vertex >> b_;
cmph_uint32 base_rank = ranktable_[index]; uint32_t base_rank = ranktable_[index];
cmph_uint32 beg_idx_v = index << b_; uint32_t beg_idx_v = index << b_;
cmph_uint32 beg_idx_b = beg_idx_v >> 2; uint32_t beg_idx_b = beg_idx_v >> 2;
cmph_uint32 end_idx_b = vertex >> 2; uint32_t end_idx_b = vertex >> 2;
while (beg_idx_b < end_idx_b) base_rank += kBdzLookupTable[g_[beg_idx_b++]]; while (beg_idx_b < end_idx_b) base_rank += kBdzLookupTable[g_[beg_idx_b++]];
beg_idx_v = beg_idx_b << 2; beg_idx_v = beg_idx_b << 2;
// cerr << "beg_idx_v: " << beg_idx_v << endl; // cerr << "beg_idx_v: " << beg_idx_v << endl;

View File

@ -3,6 +3,8 @@
// Minimal perfect hash abstraction implementing the BDZ algorithm // Minimal perfect hash abstraction implementing the BDZ algorithm
#include <stdint.h>
#include <cassert> #include <cassert>
#include <cmath> #include <cmath>
#include <unordered_map> // for std::hash #include <unordered_map> // for std::hash
@ -13,61 +15,61 @@
using std::cerr; using std::cerr;
using std::endl; using std::endl;
#include "cmph_hash_function.h" #include "cxxmph_hash.h"
#include "trigraph.h" #include "trigraph.h"
namespace cxxmph { namespace cxxmph {
class MPHTable { class MPHTable {
public: public:
MPHTable(double c = 1.23, cmph_uint8 b = 7) : MPHTable(double c = 1.23, uint8_t b = 7) :
c_(c), b_(b), m_(0), n_(0), k_(0), r_(0) { } c_(c), b_(b), m_(0), n_(0), k_(0), r_(0) { }
~MPHTable() {} ~MPHTable() {}
template <class SeededHashFcn, class ForwardIterator> template <class SeededHashFcn, class ForwardIterator>
bool Reset(ForwardIterator begin, ForwardIterator end); bool Reset(ForwardIterator begin, ForwardIterator end);
template <class SeededHashFcn, class Key> // must agree with Reset template <class SeededHashFcn, class Key> // must agree with Reset
cmph_uint32 index(const Key& x) const; uint32_t index(const Key& x) const;
cmph_uint32 size() const { return m_; } uint32_t size() const { return m_; }
void clear(); void clear();
private: private:
template <class SeededHashFcn, class ForwardIterator> template <class SeededHashFcn, class ForwardIterator>
bool Mapping(ForwardIterator begin, ForwardIterator end, bool Mapping(ForwardIterator begin, ForwardIterator end,
std::vector<TriGraph::Edge>* edges, std::vector<TriGraph::Edge>* edges,
std::vector<cmph_uint32>* queue); std::vector<uint32_t>* queue);
bool GenerateQueue(TriGraph* graph, std::vector<cmph_uint32>* queue); bool GenerateQueue(TriGraph* graph, std::vector<uint32_t>* queue);
void Assigning(const std::vector<TriGraph::Edge>& edges, void Assigning(const std::vector<TriGraph::Edge>& edges,
const std::vector<cmph_uint32>& queue); const std::vector<uint32_t>& queue);
void Ranking(); void Ranking();
cmph_uint32 Rank(cmph_uint32 vertex) const; uint32_t Rank(uint32_t vertex) const;
// Algorithm parameters // Algorithm parameters
double c_; // Number of bits per key (? is it right) double c_; // Number of bits per key (? is it right)
cmph_uint8 b_; // Number of bits of the kth index in the ranktable uint8_t b_; // Number of bits of the kth index in the ranktable
// Values used during generation // Values used during generation
cmph_uint32 m_; // edges count uint32_t m_; // edges count
cmph_uint32 n_; // vertex count uint32_t n_; // vertex count
cmph_uint32 k_; // kth index in ranktable, $k = log_2(n=3r)\varepsilon$ uint32_t k_; // kth index in ranktable, $k = log_2(n=3r)\varepsilon$
// Values used during search // Values used during search
// Partition vertex count, derived from c parameter. // Partition vertex count, derived from c parameter.
cmph_uint32 r_; uint32_t r_;
// The array containing the minimal perfect hash function graph. // The array containing the minimal perfect hash function graph.
std::vector<cmph_uint8> g_; std::vector<uint8_t> g_;
// The table used for the rank step of the minimal perfect hash function // The table used for the rank step of the minimal perfect hash function
std::vector<cmph_uint32> ranktable_; std::vector<uint32_t> ranktable_;
// The selected hash seed triplet for finding the edges in the minimal // The selected hash seed triplet for finding the edges in the minimal
// perfect hash function graph. // perfect hash function graph.
cmph_uint32 hash_seed_[3]; uint32_t hash_seed_[3];
static const cmph_uint8 valuemask[]; static const uint8_t valuemask[];
static void set_2bit_value(std::vector<cmph_uint8> *d, cmph_uint32 i, cmph_uint8 v) { static void set_2bit_value(std::vector<uint8_t> *d, uint32_t i, uint8_t v) {
(*d)[(i >> 2)] &= (v << ((i & 3) << 1)) | valuemask[i & 3]; (*d)[(i >> 2)] &= (v << ((i & 3) << 1)) | valuemask[i & 3];
} }
static cmph_uint32 get_2bit_value(const std::vector<cmph_uint8>& d, cmph_uint32 i) { static uint32_t get_2bit_value(const std::vector<uint8_t>& d, uint32_t i) {
return (d[(i >> 2)] >> ((i & 3) << 1)) & 3; return (d[(i >> 2)] >> ((i & 3) << 1)) & 3;
} }
@ -78,7 +80,7 @@ class MPHTable {
template <class SeededHashFcn, class ForwardIterator> template <class SeededHashFcn, class ForwardIterator>
bool MPHTable::Reset(ForwardIterator begin, ForwardIterator end) { bool MPHTable::Reset(ForwardIterator begin, ForwardIterator end) {
m_ = end - begin; m_ = end - begin;
r_ = static_cast<cmph_uint32>(ceil((c_*m_)/3)); r_ = static_cast<uint32_t>(ceil((c_*m_)/3));
if ((r_ % 2) == 0) r_ += 1; if ((r_ % 2) == 0) r_ += 1;
n_ = 3*r_; n_ = 3*r_;
k_ = 1U << b_; k_ = 1U << b_;
@ -87,7 +89,7 @@ bool MPHTable::Reset(ForwardIterator begin, ForwardIterator end) {
int iterations = 10; int iterations = 10;
std::vector<TriGraph::Edge> edges; std::vector<TriGraph::Edge> edges;
std::vector<cmph_uint32> queue; std::vector<uint32_t> queue;
while (1) { while (1) {
cerr << "Iterations missing: " << iterations << endl; cerr << "Iterations missing: " << iterations << endl;
for (int i = 0; i < 3; ++i) hash_seed_[i] = random() % m_; for (int i = 0; i < 3; ++i) hash_seed_[i] = random() % m_;
@ -106,14 +108,14 @@ bool MPHTable::Reset(ForwardIterator begin, ForwardIterator end) {
template <class SeededHashFcn, class ForwardIterator> template <class SeededHashFcn, class ForwardIterator>
bool MPHTable::Mapping( bool MPHTable::Mapping(
ForwardIterator begin, ForwardIterator end, ForwardIterator begin, ForwardIterator end,
std::vector<TriGraph::Edge>* edges, std::vector<cmph_uint32>* queue) { std::vector<TriGraph::Edge>* edges, std::vector<uint32_t>* queue) {
TriGraph graph(n_, m_); TriGraph graph(n_, m_);
for (ForwardIterator it = begin; it != end; ++it) { for (ForwardIterator it = begin; it != end; ++it) {
cmph_uint32 h[3]; uint32_t h[3];
for (int i = 0; i < 3; ++i) h[i] = SeededHashFcn()(*it, hash_seed_[i]); for (int i = 0; i < 3; ++i) h[i] = SeededHashFcn()(*it, hash_seed_[i]);
cmph_uint32 v0 = h[0] % r_; uint32_t v0 = h[0] % r_;
cmph_uint32 v1 = h[1] % r_ + r_; uint32_t v1 = h[1] % r_ + r_;
cmph_uint32 v2 = h[2] % r_ + (r_ << 1); uint32_t v2 = h[2] % r_ + (r_ << 1);
// cerr << "Key: " << *it << " edge " << it - begin << " (" << v0 << "," << v1 << "," << v2 << ")" << endl; // cerr << "Key: " << *it << " edge " << it - begin << " (" << v0 << "," << v1 << "," << v2 << ")" << endl;
graph.AddEdge(TriGraph::Edge(v0, v1, v2)); graph.AddEdge(TriGraph::Edge(v0, v1, v2));
} }
@ -125,8 +127,8 @@ bool MPHTable::Mapping(
} }
template <class SeededHashFcn, class Key> template <class SeededHashFcn, class Key>
cmph_uint32 MPHTable::index(const Key& key) const { uint32_t MPHTable::index(const Key& key) const {
cmph_uint32 h[3]; uint32_t h[3];
for (int i = 0; i < 3; ++i) h[i] = SeededHashFcn()(key, hash_seed_[i]); for (int i = 0; i < 3; ++i) h[i] = SeededHashFcn()(key, hash_seed_[i]);
h[0] = h[0] % r_; h[0] = h[0] % r_;
h[1] = h[1] % r_ + r_; h[1] = h[1] % r_ + r_;
@ -136,19 +138,19 @@ cmph_uint32 MPHTable::index(const Key& key) const {
assert((h[0] >> 2) <g_.size()); assert((h[0] >> 2) <g_.size());
assert((h[1] >> 2) <g_.size()); assert((h[1] >> 2) <g_.size());
assert((h[2] >> 2) <g_.size()); assert((h[2] >> 2) <g_.size());
cmph_uint32 vertex = h[(get_2bit_value(g_, h[0]) + get_2bit_value(g_, h[1]) + get_2bit_value(g_, h[2])) % 3]; uint32_t vertex = h[(get_2bit_value(g_, h[0]) + get_2bit_value(g_, h[1]) + get_2bit_value(g_, h[2])) % 3];
// cerr << "Search found vertex " << vertex << endl; // cerr << "Search found vertex " << vertex << endl;
return Rank(vertex); return Rank(vertex);
} }
template <class Key, class HashFcn = typename OptimizedSeededHashFunction<std::hash<Key> >::hash_function> template <class Key, class HashFcn = typename cxxmph_hash<std::hash<Key> >::hash_function>
class SimpleMPHTable : public MPHTable { class SimpleMPHTable : public MPHTable {
public: public:
template <class ForwardIterator> template <class ForwardIterator>
bool Reset(ForwardIterator begin, ForwardIterator end) { bool Reset(ForwardIterator begin, ForwardIterator end) {
return MPHTable::Reset<HashFcn>(begin, end); return MPHTable::Reset<HashFcn>(begin, end);
} }
cmph_uint32 index(const Key& key) { return MPHTable::index<HashFcn>(key); } uint32_t index(const Key& key) { return MPHTable::index<HashFcn>(key); }
}; };
} // namespace cxxmph } // namespace cxxmph

View File

@ -9,12 +9,12 @@ using std::endl;
using std::vector; using std::vector;
namespace { namespace {
static const cmph_uint32 kInvalidEdge = std::numeric_limits<cmph_uint32>::max(); static const uint32_t kInvalidEdge = std::numeric_limits<uint32_t>::max();
} }
namespace cxxmph { namespace cxxmph {
TriGraph::TriGraph(cmph_uint32 nvertices, cmph_uint32 nedges) TriGraph::TriGraph(uint32_t nvertices, uint32_t nedges)
: nedges_(0), : nedges_(0),
edges_(nedges), edges_(nedges),
next_edge_(nedges), next_edge_(nedges),
@ -23,8 +23,8 @@ TriGraph::TriGraph(cmph_uint32 nvertices, cmph_uint32 nedges)
void TriGraph::ExtractEdgesAndClear(vector<Edge>* edges) { void TriGraph::ExtractEdgesAndClear(vector<Edge>* edges) {
vector<Edge>().swap(next_edge_); vector<Edge>().swap(next_edge_);
vector<cmph_uint32>().swap(first_edge_); vector<uint32_t>().swap(first_edge_);
vector<cmph_uint8>().swap(vertex_degree_); vector<uint8_t>().swap(vertex_degree_);
nedges_ = 0; nedges_ = 0;
edges->swap(edges_); edges->swap(edges_);
} }
@ -45,13 +45,13 @@ void TriGraph::AddEdge(const Edge& edge) {
++nedges_; ++nedges_;
} }
void TriGraph::RemoveEdge(cmph_uint32 current_edge) { void TriGraph::RemoveEdge(uint32_t current_edge) {
// cerr << "Removing edge " << current_edge << " from " << nedges_ << " existing edges " << endl; // cerr << "Removing edge " << current_edge << " from " << nedges_ << " existing edges " << endl;
for (int i = 0; i < 3; ++i) { for (int i = 0; i < 3; ++i) {
cmph_uint32 vertex = edges_[current_edge][i]; uint32_t vertex = edges_[current_edge][i];
cmph_uint32 edge1 = first_edge_[vertex]; uint32_t edge1 = first_edge_[vertex];
cmph_uint32 edge2 = kInvalidEdge; uint32_t edge2 = kInvalidEdge;
cmph_uint32 j = 0; uint32_t j = 0;
while (edge1 != current_edge && edge1 != kInvalidEdge) { while (edge1 != current_edge && edge1 != kInvalidEdge) {
edge2 = edge1; edge2 = edge1;
if (edges_[edge1][0] == vertex) j = 0; if (edges_[edge1][0] == vertex) j = 0;

View File

@ -6,42 +6,41 @@
// required. For each vertex, we store how many edges touch it (degree) and the // required. For each vertex, we store how many edges touch it (degree) and the
// index of the first edge in the vector of triples representing the edges. // index of the first edge in the vector of triples representing the edges.
#include <stdint.h> // for uint32_t and friends
#include <vector> #include <vector>
#include "cmph_types.h"
namespace cxxmph { namespace cxxmph {
class TriGraph { class TriGraph {
public: public:
struct Edge { struct Edge {
Edge() { } Edge() { }
Edge(cmph_uint32 v0, cmph_uint32 v1, cmph_uint32 v2) { Edge(uint32_t v0, uint32_t v1, uint32_t v2) {
vertices[0] = v0; vertices[0] = v0;
vertices[1] = v1; vertices[1] = v1;
vertices[2] = v2; vertices[2] = v2;
} }
cmph_uint32& operator[](cmph_uint8 v) { return vertices[v]; } uint32_t& operator[](uint8_t v) { return vertices[v]; }
const cmph_uint32& operator[](cmph_uint8 v) const { return vertices[v]; } const uint32_t& operator[](uint8_t v) const { return vertices[v]; }
cmph_uint32 vertices[3]; uint32_t vertices[3];
}; };
TriGraph(cmph_uint32 nedges, cmph_uint32 nvertices); TriGraph(uint32_t nedges, uint32_t nvertices);
void AddEdge(const Edge& edge); void AddEdge(const Edge& edge);
void RemoveEdge(cmph_uint32 edge_id); void RemoveEdge(uint32_t edge_id);
void ExtractEdgesAndClear(std::vector<Edge>* edges); void ExtractEdgesAndClear(std::vector<Edge>* edges);
void DebugGraph() const; void DebugGraph() const;
const std::vector<Edge>& edges() const { return edges_; } const std::vector<Edge>& edges() const { return edges_; }
const std::vector<cmph_uint8>& vertex_degree() const { return vertex_degree_; } const std::vector<uint8_t>& vertex_degree() const { return vertex_degree_; }
const std::vector<cmph_uint32>& first_edge() const { return first_edge_; } const std::vector<uint32_t>& first_edge() const { return first_edge_; }
private: private:
cmph_uint32 nedges_; // total number of edges uint32_t nedges_; // total number of edges
std::vector<Edge> edges_; std::vector<Edge> edges_;
std::vector<Edge> next_edge_; // for implementing removal std::vector<Edge> next_edge_; // for implementing removal
std::vector<cmph_uint32> first_edge_; // the first edge for this vertex std::vector<uint32_t> first_edge_; // the first edge for this vertex
std::vector<cmph_uint8> vertex_degree_; // number of edges for this vertex std::vector<uint8_t> vertex_degree_; // number of edges for this vertex
}; };
} // namespace cxxmph } // namespace cxxmph

View File

@ -1,22 +0,0 @@
#include <cassert>
#include "trigraph.h"
using cxxmph::TriGraph;
int main(int argc, char** argv) {
TriGraph g(4, 2);
g.AddEdge(TriGraph::Edge(0, 1, 2));
g.AddEdge(TriGraph::Edge(1, 3, 2));
assert(g.vertex_degree()[0] == 1);
assert(g.vertex_degree()[1] == 2);
assert(g.vertex_degree()[2] == 2);
assert(g.vertex_degree()[3] == 1);
g.RemoveEdge(0);
assert(g.vertex_degree()[0] == 0);
assert(g.vertex_degree()[1] == 1);
assert(g.vertex_degree()[2] == 1);
assert(g.vertex_degree()[3] == 1);
std::vector<TriGraph::Edge> edges;
g.ExtractEdgesAndClear(&edges);
}

View File

@ -1,4 +1,5 @@
bin_PROGRAMS = cmph bin_PROGRAMS = cmph
check_PROGRAMS = cmph_benchmark_test
lib_LTLIBRARIES = libcmph.la lib_LTLIBRARIES = libcmph.la
include_HEADERS = cmph.h cmph_types.h cmph_time.h chd_ph.h include_HEADERS = cmph.h cmph_types.h cmph_time.h chd_ph.h
libcmph_la_SOURCES = hash.h hash.c \ libcmph_la_SOURCES = hash.h hash.c \
@ -30,3 +31,6 @@ libcmph_la_LDFLAGS = -version-info 0:0:0
cmph_SOURCES = main.c wingetopt.h wingetopt.c cmph_SOURCES = main.c wingetopt.h wingetopt.c
cmph_LDADD = libcmph.la cmph_LDADD = libcmph.la
cmph_benchmark_test_SOURCES = cmph_benchmark_test.cc
cmph_benchmark_test_LDADD = libcmph.la

View File

@ -1,3 +1,5 @@
// A simple benchmark tool around getrusage
#include <assert.h> #include <assert.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
@ -42,17 +44,19 @@ int timeval_subtract (
benchmark_t* find_benchmark(const char* name) { benchmark_t* find_benchmark(const char* name) {
benchmark_t* benchmark = global_benchmarks; benchmark_t* benchmark = global_benchmarks;
while (benchmark->name != NULL) if (strcmp(benchmark->name, name) != 0) break; while (benchmark && benchmark->name != NULL) {
if (!benchmark->name) return NULL; if (strcmp(benchmark->name, name) == 0) break;
++benchmark;
}
if (!benchmark || !benchmark->name) return NULL;
return benchmark; return benchmark;
} }
int global_benchmarks_length() { int global_benchmarks_length() {
benchmark_t* benchmark; benchmark_t* benchmark = global_benchmarks;
int length = 0; int length = 0;
if (global_benchmarks == 0) return 0; if (benchmark == NULL) return 0;
benchmark = global_benchmarks; while (benchmark->name != NULL) ++length, ++benchmark;
while (benchmark->name != NULL) ++length;
return length; return length;
} }
@ -62,8 +66,11 @@ void bm_register(const char* name, void (*func)(int), int iters) {
benchmark.name = name; benchmark.name = name;
benchmark.func = func; benchmark.func = func;
assert(!find_benchmark(name)); assert(!find_benchmark(name));
global_benchmarks = realloc(global_benchmarks, length + 1); global_benchmarks = realloc(
global_benchmarks, (length + 2)*sizeof(benchmark_t));
global_benchmarks[length] = benchmark; global_benchmarks[length] = benchmark;
memset(&benchmark, 0, sizeof(benchmark_t)); // pivot
global_benchmarks[length + 1] = benchmark;
} }
void bm_start(const char* name) { void bm_start(const char* name) {
@ -71,6 +78,7 @@ void bm_start(const char* name) {
struct rusage rs; struct rusage rs;
benchmark = find_benchmark(name); benchmark = find_benchmark(name);
assert(benchmark);
int ret = getrusage(RUSAGE_SELF, &rs); int ret = getrusage(RUSAGE_SELF, &rs);
if (ret != 0) { if (ret != 0) {
perror("rusage failed"); perror("rusage failed");
@ -98,6 +106,19 @@ void bm_end(const char* name) {
struct timeval stime; struct timeval stime;
timeval_subtract(&stime, &benchmark->end.ru_stime, &benchmark->begin.ru_stime); timeval_subtract(&stime, &benchmark->end.ru_stime, &benchmark->begin.ru_stime);
printf("User cpu time used: %ld.%6ld\n", utime.tv_sec, utime.tv_usec); printf("Benchmark: %s\n", benchmark->name);
printf("System cpu time used: %ld.%6ld\n", stime.tv_sec, stime.tv_usec); printf("User time used : %ld.%6ld\n", utime.tv_sec, utime.tv_usec);
printf("System time used: %ld.%6ld\n", stime.tv_sec, stime.tv_usec);
printf("Wall time used : %ld.%6ld\n", stime.tv_sec, stime.tv_usec);
printf("\n");
} }
void run_benchmarks(int argc, char** argv) {
benchmark_t* benchmark = global_benchmarks;
while (benchmark && benchmark->name != NULL) {
bm_start(benchmark->name);
bm_end(benchmark->name);
++benchmark;
}
}

View File

@ -9,7 +9,7 @@ extern "C"
{ {
#endif #endif
#define BM_REGISTER(func, iters) bm_register(##func, func, iters); #define BM_REGISTER(func, iters) bm_register(#func, func, iters)
void bm_register(const char* name, void (*func)(int), int iters); void bm_register(const char* name, void (*func)(int), int iters);
void run_benchmarks(int argc, char** argv); void run_benchmarks(int argc, char** argv);

View File

@ -0,0 +1,22 @@
#include <unistd.h> // for sleep
#include <limits.h>
#include "cmph_benchmark.h"
void bm_sleep(int iters) {
sleep(1);
}
void bm_increment(int iters) {
int i, v = 0;
for (i = 0; i < INT_MAX; ++i) {
v += i;
}
}
int main(int argc, char** argv) {
BM_REGISTER(bm_sleep, 1);
BM_REGISTER(bm_increment, 1);
run_benchmarks(argc, argv);
}