1
Fork 0

It works.

This commit is contained in:
Davi de Castro Reis 2010-10-27 19:45:43 -07:00
parent 385ce27a10
commit 22d149d3a8
6 changed files with 80 additions and 18 deletions

View File

@ -36,7 +36,7 @@ static const cmph_uint8 valuemask[] = { 0xfc, 0xf3, 0xcf, 0x3f};
void set_2bit_value(vector<cmph_uint8> *d, cmph_uint8 i, cmph_uint8 v) { void set_2bit_value(vector<cmph_uint8> *d, cmph_uint8 i, cmph_uint8 v) {
(*d)[(i >> 2)] &= (v << ((i & 3) << 1)) | valuemask[i & 3]; (*d)[(i >> 2)] &= (v << ((i & 3) << 1)) | valuemask[i & 3];
} }
cmph_uint8 get_2bit_value(const vector<cmph_uint8>& d, cmph_uint8 i) { cmph_uint32 get_2bit_value(const vector<cmph_uint8>& d, cmph_uint8 i) {
return (d[(i >> 2)] >> ((i & 3) << 1)) & 3; return (d[(i >> 2)] >> ((i & 3) << 1)) & 3;
} }
@ -50,7 +50,7 @@ bool MPHTable::GenerateQueue(
cmph_uint32 nedges = m_; cmph_uint32 nedges = m_;
cmph_uint32 nvertices = n_; cmph_uint32 nvertices = n_;
// Relies on vector<bool> using 1 bit per element // Relies on vector<bool> using 1 bit per element
vector<bool> marked_edge((nedges >> 3) + 1, false); vector<bool> marked_edge(nedges + 1, false);
vector<cmph_uint32> queue(nvertices, 0); vector<cmph_uint32> queue(nvertices, 0);
for (cmph_uint32 i = 0; i < nedges; ++i) { for (cmph_uint32 i = 0; i < nedges; ++i) {
const TriGraph::Edge& e = graph->edges()[i]; const TriGraph::Edge& e = graph->edges()[i];
@ -63,6 +63,15 @@ bool MPHTable::GenerateQueue(
} }
} }
} }
for (unsigned int i = 0; i < marked_edge.size(); ++i) {
cerr << "vertex with degree " << static_cast<cmph_uint32>(graph->vertex_degree()[i]) << " marked " << marked_edge[i] << endl;
}
for (unsigned int i = 0; i < queue.size(); ++i) {
cerr << "vertex " << i << " queued at " << queue[i] << endl;
}
// At this point queue head is the number of edges touching at least one
// vertex of degree 1.
cerr << "Queue head " << queue_head << " Queue tail " << queue_tail << endl;
while (queue_tail != queue_head) { while (queue_tail != queue_head) {
cmph_uint32 current_edge = queue[queue_tail++]; cmph_uint32 current_edge = queue[queue_tail++];
graph->RemoveEdge(current_edge); graph->RemoveEdge(current_edge);
@ -78,6 +87,9 @@ bool MPHTable::GenerateQueue(
} }
} }
} }
for (unsigned int i = 0; i < queue.size(); ++i) {
cerr << "vertex " << i << " queued at " << queue[i] << endl;
}
int cycles = queue_head - nedges; int cycles = queue_head - nedges;
if (cycles == 0) queue.swap(*queue_output); if (cycles == 0) queue.swap(*queue_output);
return cycles == 0; return cycles == 0;
@ -85,14 +97,21 @@ bool MPHTable::GenerateQueue(
void MPHTable::Assigning( void MPHTable::Assigning(
const vector<TriGraph::Edge>& edges, const vector<cmph_uint32>& queue) { const vector<TriGraph::Edge>& edges, const vector<cmph_uint32>& queue) {
cmph_uint32 nedges = n_; cmph_uint32 nedges = m_;
cmph_uint32 current_edge = 0; cmph_uint32 current_edge = 0;
vector<bool> marked_vertices(nedges + 1); vector<bool> marked_vertices(nedges + 1);
// Initialize vector of half nibbles with all bits set. // Initialize vector of half nibbles with all bits set.
vector<cmph_uint8>(nedges, std::numeric_limits<cmph_uint8>::max()).swap(g_); cmph_uint32 sizeg = static_cast<cmph_uint32>(ceil(n_/4.0));
vector<cmph_uint8>(sizeg, std::numeric_limits<cmph_uint8>::max()).swap(g_);
for (int i = nedges - 1; i + 1 >= 1; --i) { for (int i = nedges - 1; i + 1 >= 1; --i) {
current_edge = queue[i]; current_edge = queue[i];
cerr << "Current edge " << current_edge << " at queue pos " << i << endl;
const TriGraph::Edge& e = edges[current_edge]; const TriGraph::Edge& e = edges[current_edge];
cerr << "B: " << e[0] << " " << e[1] << " " << e[2] << " -> "
<< get_2bit_value(g_, e[0]) << " "
<< get_2bit_value(g_, e[1]) << " "
<< get_2bit_value(g_, e[2]) << " " << endl;
if (!marked_vertices[e[0]]) { if (!marked_vertices[e[0]]) {
if (!marked_vertices[e[1]]) { if (!marked_vertices[e[1]]) {
set_2bit_value(&g_, e[1], kUnassigned); set_2bit_value(&g_, e[1], kUnassigned);
@ -115,6 +134,10 @@ void MPHTable::Assigning(
set_2bit_value(&g_, e[2], (8 - (get_2bit_value(g_, e[0]) + get_2bit_value(g_, e[1]))) % 3); set_2bit_value(&g_, e[2], (8 - (get_2bit_value(g_, e[0]) + get_2bit_value(g_, e[1]))) % 3);
marked_vertices[e[2]] = true; marked_vertices[e[2]] = true;
} }
cerr << "A: " << e[0] << " " << e[1] << " " << e[2] << " -> "
<< get_2bit_value(g_, e[0]) << " "
<< get_2bit_value(g_, e[1]) << " "
<< get_2bit_value(g_, e[2]) << " " << endl;
} }
} }
@ -128,7 +151,7 @@ void MPHTable::Ranking() {
vector<cmph_uint32> (ranktablesize).swap(ranktable_);; vector<cmph_uint32> (ranktablesize).swap(ranktable_);;
cmph_uint32 offset = 0; cmph_uint32 offset = 0;
cmph_uint32 count = 0; cmph_uint32 count = 0;
cmph_uint32 i = 0; cmph_uint32 i = 1;
while (1) { while (1) {
if (i == ranktable_.size()) break; if (i == ranktable_.size()) break;
cmph_uint32 nbytes = size < nbytes_total ? size : nbytes_total; cmph_uint32 nbytes = size < nbytes_total ? size : nbytes_total;
@ -142,11 +165,13 @@ void MPHTable::Ranking() {
cmph_uint32 MPHTable::Search(const key_type& key) const { cmph_uint32 MPHTable::Search(const key_type& key) const {
cmph_uint32 h[3]; cmph_uint32 h[3];
for (int i = 0; i < 3; ++i) h[i] = hash_function_[i](key); // for (int i = 0; i < 3; ++i) h[i] = hash_function_[i](key);
hash_function_[0](key, h);
h[0] = h[0] % r_; h[0] = h[0] % r_;
h[1] = h[1] % r_ + r_; h[1] = h[1] % r_ + r_;
h[2] = h[2] % r_ + (r_ << 1); h[2] = h[2] % r_ + (r_ << 1);
cmph_uint32 vertex = h[(g_[h[0]] + g_[h[1]] + g_[h[2]]) % 3]; cmph_uint32 vertex = h[(get_2bit_value(g_, h[0]) + get_2bit_value(g_, h[1]) + get_2bit_value(g_, h[2])) % 3];
cerr << "Search found vertex " << vertex << endl;
return Rank(vertex); return Rank(vertex);
} }
@ -154,14 +179,23 @@ cmph_uint32 MPHTable::Rank(cmph_uint32 vertex) const {
cmph_uint32 index = vertex >> b_; cmph_uint32 index = vertex >> b_;
cmph_uint32 base_rank = ranktable_[index]; cmph_uint32 base_rank = ranktable_[index];
cmph_uint32 beg_idx_v = index << b_; cmph_uint32 beg_idx_v = index << b_;
cmph_uint32 beg_idx_b = index >> 2; cmph_uint32 beg_idx_b = beg_idx_v >> 2;
cmph_uint32 end_idx_b = index >> 2; cmph_uint32 end_idx_b = vertex >> 2;
while (beg_idx_b < end_idx_b) base_rank += kBdzLookupTable[g_[beg_idx_b++]]; while (beg_idx_b < end_idx_b) base_rank += kBdzLookupTable[g_[beg_idx_b++]];
beg_idx_v = beg_idx_b << 2; beg_idx_v = beg_idx_b << 2;
cerr << "beg_idx_v: " << beg_idx_v << endl;
cerr << "base rank: " << base_rank << endl;
cerr << "G: ";
for (unsigned int i = 0; i < n_; ++i) {
cerr << get_2bit_value(g_, i) << " ";
}
while (beg_idx_v < vertex) { while (beg_idx_v < vertex) {
if (g_[beg_idx_v] != kUnassigned) ++base_rank; cerr << get_2bit_value(g_, beg_idx_v) << " ";
if (get_2bit_value(g_, beg_idx_v) != kUnassigned) ++base_rank;
++beg_idx_v; ++beg_idx_v;
} }
cerr << "Base rank: " << base_rank << endl;
return base_rank; return base_rank;
} }

View File

@ -22,7 +22,7 @@ class MPHTable {
// This class could be a template for both key type and hash function, but we // This class could be a template for both key type and hash function, but we
// chose to go with simplicity. // chose to go with simplicity.
typedef StringPiece key_type; typedef StringPiece key_type;
typedef RandomlySeededHashFunction<Murmur2StringPiece> hasher_type; typedef RandomlySeededHashFunction<JenkinsStringPiece> hasher_type;
MPHTable(double c = 1.23, cmph_uint8 b = 7) : c_(c), b_(b) { } MPHTable(double c = 1.23, cmph_uint8 b = 7) : c_(c), b_(b) { }
~MPHTable() {} ~MPHTable() {}
@ -82,7 +82,9 @@ bool MPHTable::Reset(ForwardIterator begin, ForwardIterator end) {
std::vector<cmph_uint32> queue; std::vector<cmph_uint32> queue;
while (1) { while (1) {
cerr << "Iterations missing: " << iterations << endl; cerr << "Iterations missing: " << iterations << endl;
for (int i = 0; i < 3; ++i) hash_function_[i] = hasher_type(); // for (int i = 0; i < 3; ++i) hash_function_[i] = hasher_type();
hash_function_[0] = hasher_type();
cerr << "Seed: " << hash_function_[0].seed << endl;
if (Mapping(begin, end, &edges, &queue)) break; if (Mapping(begin, end, &edges, &queue)) break;
else --iterations; else --iterations;
if (iterations == 0) break; if (iterations == 0) break;
@ -101,11 +103,12 @@ bool MPHTable::Mapping(
TriGraph graph(n_, m_); TriGraph graph(n_, m_);
for (ForwardIterator it = begin; it != end; ++it) { for (ForwardIterator it = begin; it != end; ++it) {
cmph_uint32 h[3]; cmph_uint32 h[3];
for (int i = 0; i < 3; ++i) h[i] = hash_function_[i](*it); // for (int i = 0; i < 3; ++i) h[i] = hash_function_[i](*it);
hash_function_[0](*it, h);
cmph_uint32 v0 = h[0] % r_; cmph_uint32 v0 = h[0] % r_;
cmph_uint32 v1 = h[1] % r_ + r_; cmph_uint32 v1 = h[1] % r_ + r_;
cmph_uint32 v2 = h[2] % r_ + (r_ << 1); cmph_uint32 v2 = h[2] % r_ + (r_ << 1);
cerr << "Key: " << *it << " vertex " << it - begin << " (" << v0 << "," << v1 << "," << v2 << ")" << endl; cerr << "Key: " << *it << " edge " << it - begin << " (" << v0 << "," << v1 << "," << v2 << ")" << endl;
graph.AddEdge(TriGraph::Edge(v0, v1, v2)); graph.AddEdge(TriGraph::Edge(v0, v1, v2));
} }
if (GenerateQueue(&graph, queue)) { if (GenerateQueue(&graph, queue)) {

View File

@ -9,6 +9,8 @@ using std::vector;
using cxxmph::MPHTable; using cxxmph::MPHTable;
int main(int argc, char** argv) { int main(int argc, char** argv) {
srand(1);
vector<string> keys; vector<string> keys;
keys.push_back("davi"); keys.push_back("davi");
keys.push_back("paulo"); keys.push_back("paulo");

View File

@ -8,6 +8,7 @@
#include "../src/cmph_types.h" #include "../src/cmph_types.h"
#include "MurmurHash2.h" #include "MurmurHash2.h"
#include "jenkins_hash.h"
#include "stringpiece.h" #include "stringpiece.h"
namespace cxxmph { namespace cxxmph {
@ -15,9 +16,25 @@ namespace cxxmph {
template <class HashFun> template <class HashFun>
struct RandomlySeededHashFunction { }; struct RandomlySeededHashFunction { };
class JenkinsStringPiece { };
class Murmur2StringPiece { }; class Murmur2StringPiece { };
class Murmur2Pod { }; class Murmur2Pod { };
template <>
struct RandomlySeededHashFunction<JenkinsStringPiece> {
RandomlySeededHashFunction() {
srand(1);
seed = 4;
}
cmph_uint32 operator()(const StringPiece& key) const {
return jenkins_hash(key.data(), key.length(), seed);
}
void operator()(const StringPiece& key, cmph_uint32* hashes) const {
__jenkins_hash_vector(seed, key.data(), key.length(), hashes);
}
cmph_uint32 seed;
};
template <> template <>
struct RandomlySeededHashFunction<Murmur2StringPiece> { struct RandomlySeededHashFunction<Murmur2StringPiece> {
RandomlySeededHashFunction() : seed(random()) { } RandomlySeededHashFunction() : seed(random()) { }

View File

@ -9,7 +9,7 @@
#include <stdio.h> #include <stdio.h>
#include <assert.h> #include <assert.h>
#include <string.h> #include <string.h>
//#define DEBUG #define DEBUG
#include "debug.h" #include "debug.h"
#define UNASSIGNED 3U #define UNASSIGNED 3U
#define NULL_EDGE 0xffffffff #define NULL_EDGE 0xffffffff
@ -177,9 +177,11 @@ static int bdz_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_que
} }
}; };
}; };
DEBUGP("Queue head %d Queue tail %d\n", queue_head, queue_tail);
while(queue_tail!=queue_head){ while(queue_tail!=queue_head){
curr_edge=queue[queue_tail++]; curr_edge=queue[queue_tail++];
bdz_remove_edge(graph3,curr_edge); bdz_remove_edge(graph3,curr_edge);
DEBUGP("Removing edge %d\n", curr_edge);
v0=graph3->edges[curr_edge].vertices[0]; v0=graph3->edges[curr_edge].vertices[0];
v1=graph3->edges[curr_edge].vertices[1]; v1=graph3->edges[curr_edge].vertices[1];
v2=graph3->edges[curr_edge].vertices[2]; v2=graph3->edges[curr_edge].vertices[2];
@ -403,6 +405,7 @@ static int bdz_mapping(cmph_config_t *mph, bdz_graph3_t* graph3, bdz_queue_t que
h0 = hl[0] % bdz->r; h0 = hl[0] % bdz->r;
h1 = hl[1] % bdz->r + bdz->r; h1 = hl[1] % bdz->r + bdz->r;
h2 = hl[2] % bdz->r + (bdz->r << 1); h2 = hl[2] % bdz->r + (bdz->r << 1);
DEBUGP("Key: %s (%u %u %u)\n", key, h0, h1, h2);
mph->key_source->dispose(mph->key_source->data, key, keylen); mph->key_source->dispose(mph->key_source->data, key, keylen);
bdz_add_edge(graph3,h0,h1,h2); bdz_add_edge(graph3,h0,h1,h2);
} }
@ -427,7 +430,7 @@ static void assigning(bdz_config_data_t *bdz, bdz_graph3_t* graph3, bdz_queue_t
v0=graph3->edges[curr_edge].vertices[0]; v0=graph3->edges[curr_edge].vertices[0];
v1=graph3->edges[curr_edge].vertices[1]; v1=graph3->edges[curr_edge].vertices[1];
v2=graph3->edges[curr_edge].vertices[2]; v2=graph3->edges[curr_edge].vertices[2];
DEBUGP("B:%u %u %u -- %u %u %u\n", v0, v1, v2, GETVALUE(bdz->g, v0), GETVALUE(bdz->g, v1), GETVALUE(bdz->g, v2)); DEBUGP("B:%u %u %u -- %u %u %u edge %u\n", v0, v1, v2, GETVALUE(bdz->g, v0), GETVALUE(bdz->g, v1), GETVALUE(bdz->g, v2), curr_edge);
if(!GETBIT(marked_vertices, v0)){ if(!GETBIT(marked_vertices, v0)){
if(!GETBIT(marked_vertices,v1)) if(!GETBIT(marked_vertices,v1))
{ {
@ -585,7 +588,9 @@ static inline cmph_uint32 rank(cmph_uint32 b, cmph_uint32 * ranktable, cmph_uint
base_rank += bdz_lookup_table[*(g + beg_idx_b++)]; base_rank += bdz_lookup_table[*(g + beg_idx_b++)];
} }
DEBUGP("base rank %u\n", base_rank);
beg_idx_v = beg_idx_b << 2; beg_idx_v = beg_idx_b << 2;
DEBUGP("beg_idx_v %u\n", beg_idx_v);
while(beg_idx_v < vertex) while(beg_idx_v < vertex)
{ {
if(GETVALUE(g, beg_idx_v) != UNASSIGNED) base_rank++; if(GETVALUE(g, beg_idx_v) != UNASSIGNED) base_rank++;
@ -605,6 +610,7 @@ cmph_uint32 bdz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
hl[1] = hl[1] % bdz->r + bdz->r; hl[1] = hl[1] % bdz->r + bdz->r;
hl[2] = hl[2] % bdz->r + (bdz->r << 1); hl[2] = hl[2] % bdz->r + (bdz->r << 1);
vertex = hl[(GETVALUE(bdz->g, hl[0]) + GETVALUE(bdz->g, hl[1]) + GETVALUE(bdz->g, hl[2])) % 3]; vertex = hl[(GETVALUE(bdz->g, hl[0]) + GETVALUE(bdz->g, hl[1]) + GETVALUE(bdz->g, hl[2])) % 3];
DEBUGP("Search found vertex %u\n", vertex);
return rank(bdz->b, bdz->ranktable, bdz->g, vertex); return rank(bdz->b, bdz->ranktable, bdz->g, vertex);
} }

View File

@ -7,7 +7,7 @@
#include <limits.h> #include <limits.h>
#include <string.h> #include <string.h>
//#define DEBUG #define DEBUG
#include "debug.h" #include "debug.h"
#define hashsize(n) ((cmph_uint32)1<<(n)) #define hashsize(n) ((cmph_uint32)1<<(n))
@ -87,8 +87,8 @@ acceptable. Do NOT use for cryptographic purposes.
jenkins_state_t *jenkins_state_new(cmph_uint32 size) //size of hash table jenkins_state_t *jenkins_state_new(cmph_uint32 size) //size of hash table
{ {
jenkins_state_t *state = (jenkins_state_t *)malloc(sizeof(jenkins_state_t)); jenkins_state_t *state = (jenkins_state_t *)malloc(sizeof(jenkins_state_t));
DEBUGP("Initializing jenkins hash\n");
state->seed = ((cmph_uint32)rand() % size); state->seed = ((cmph_uint32)rand() % size);
DEBUGP("Initializied jenkins hash with seed %d\n", state->seed);
return state; return state;
} }
void jenkins_state_destroy(jenkins_state_t *state) void jenkins_state_destroy(jenkins_state_t *state)