From 22d149d3a8d362391df3c3e7f1a6e81c65203861 Mon Sep 17 00:00:00 2001 From: Davi de Castro Reis Date: Wed, 27 Oct 2010 19:45:43 -0700 Subject: [PATCH] It works. --- cxxmph/mphtable.cc | 54 ++++++++++++++++++++++++++++------- cxxmph/mphtable.h | 11 ++++--- cxxmph/mphtable_test.cc | 2 ++ cxxmph/randomly_seeded_hash.h | 17 +++++++++++ src/bdz.c | 10 +++++-- src/jenkins_hash.c | 4 +-- 6 files changed, 80 insertions(+), 18 deletions(-) diff --git a/cxxmph/mphtable.cc b/cxxmph/mphtable.cc index 88ab6ed..6f6a788 100644 --- a/cxxmph/mphtable.cc +++ b/cxxmph/mphtable.cc @@ -36,7 +36,7 @@ static const cmph_uint8 valuemask[] = { 0xfc, 0xf3, 0xcf, 0x3f}; void set_2bit_value(vector *d, cmph_uint8 i, cmph_uint8 v) { (*d)[(i >> 2)] &= (v << ((i & 3) << 1)) | valuemask[i & 3]; } -cmph_uint8 get_2bit_value(const vector& d, cmph_uint8 i) { +cmph_uint32 get_2bit_value(const vector& d, cmph_uint8 i) { return (d[(i >> 2)] >> ((i & 3) << 1)) & 3; } @@ -50,7 +50,7 @@ bool MPHTable::GenerateQueue( cmph_uint32 nedges = m_; cmph_uint32 nvertices = n_; // Relies on vector using 1 bit per element - vector marked_edge((nedges >> 3) + 1, false); + vector marked_edge(nedges + 1, false); vector queue(nvertices, 0); for (cmph_uint32 i = 0; i < nedges; ++i) { const TriGraph::Edge& e = graph->edges()[i]; @@ -63,6 +63,15 @@ bool MPHTable::GenerateQueue( } } } + for (unsigned int i = 0; i < marked_edge.size(); ++i) { + cerr << "vertex with degree " << static_cast(graph->vertex_degree()[i]) << " marked " << marked_edge[i] << endl; + } + for (unsigned int i = 0; i < queue.size(); ++i) { + cerr << "vertex " << i << " queued at " << queue[i] << endl; + } + // At this point queue head is the number of edges touching at least one + // vertex of degree 1. + cerr << "Queue head " << queue_head << " Queue tail " << queue_tail << endl; while (queue_tail != queue_head) { cmph_uint32 current_edge = queue[queue_tail++]; graph->RemoveEdge(current_edge); @@ -78,6 +87,9 @@ bool MPHTable::GenerateQueue( } } } + for (unsigned int i = 0; i < queue.size(); ++i) { + cerr << "vertex " << i << " queued at " << queue[i] << endl; + } int cycles = queue_head - nedges; if (cycles == 0) queue.swap(*queue_output); return cycles == 0; @@ -85,14 +97,21 @@ bool MPHTable::GenerateQueue( void MPHTable::Assigning( const vector& edges, const vector& queue) { - cmph_uint32 nedges = n_; + cmph_uint32 nedges = m_; cmph_uint32 current_edge = 0; vector marked_vertices(nedges + 1); // Initialize vector of half nibbles with all bits set. - vector(nedges, std::numeric_limits::max()).swap(g_); + cmph_uint32 sizeg = static_cast(ceil(n_/4.0)); + vector(sizeg, std::numeric_limits::max()).swap(g_); + for (int i = nedges - 1; i + 1 >= 1; --i) { current_edge = queue[i]; + cerr << "Current edge " << current_edge << " at queue pos " << i << endl; const TriGraph::Edge& e = edges[current_edge]; + cerr << "B: " << e[0] << " " << e[1] << " " << e[2] << " -> " + << get_2bit_value(g_, e[0]) << " " + << get_2bit_value(g_, e[1]) << " " + << get_2bit_value(g_, e[2]) << " " << endl; if (!marked_vertices[e[0]]) { if (!marked_vertices[e[1]]) { set_2bit_value(&g_, e[1], kUnassigned); @@ -115,6 +134,10 @@ void MPHTable::Assigning( set_2bit_value(&g_, e[2], (8 - (get_2bit_value(g_, e[0]) + get_2bit_value(g_, e[1]))) % 3); marked_vertices[e[2]] = true; } + cerr << "A: " << e[0] << " " << e[1] << " " << e[2] << " -> " + << get_2bit_value(g_, e[0]) << " " + << get_2bit_value(g_, e[1]) << " " + << get_2bit_value(g_, e[2]) << " " << endl; } } @@ -128,7 +151,7 @@ void MPHTable::Ranking() { vector (ranktablesize).swap(ranktable_);; cmph_uint32 offset = 0; cmph_uint32 count = 0; - cmph_uint32 i = 0; + cmph_uint32 i = 1; while (1) { if (i == ranktable_.size()) break; cmph_uint32 nbytes = size < nbytes_total ? size : nbytes_total; @@ -142,11 +165,13 @@ void MPHTable::Ranking() { cmph_uint32 MPHTable::Search(const key_type& key) const { cmph_uint32 h[3]; - for (int i = 0; i < 3; ++i) h[i] = hash_function_[i](key); + // for (int i = 0; i < 3; ++i) h[i] = hash_function_[i](key); + hash_function_[0](key, h); h[0] = h[0] % r_; h[1] = h[1] % r_ + r_; h[2] = h[2] % r_ + (r_ << 1); - cmph_uint32 vertex = h[(g_[h[0]] + g_[h[1]] + g_[h[2]]) % 3]; + cmph_uint32 vertex = h[(get_2bit_value(g_, h[0]) + get_2bit_value(g_, h[1]) + get_2bit_value(g_, h[2])) % 3]; + cerr << "Search found vertex " << vertex << endl; return Rank(vertex); } @@ -154,14 +179,23 @@ cmph_uint32 MPHTable::Rank(cmph_uint32 vertex) const { cmph_uint32 index = vertex >> b_; cmph_uint32 base_rank = ranktable_[index]; cmph_uint32 beg_idx_v = index << b_; - cmph_uint32 beg_idx_b = index >> 2; - cmph_uint32 end_idx_b = index >> 2; + cmph_uint32 beg_idx_b = beg_idx_v >> 2; + cmph_uint32 end_idx_b = vertex >> 2; while (beg_idx_b < end_idx_b) base_rank += kBdzLookupTable[g_[beg_idx_b++]]; beg_idx_v = beg_idx_b << 2; + cerr << "beg_idx_v: " << beg_idx_v << endl; + cerr << "base rank: " << base_rank << endl; + + cerr << "G: "; + for (unsigned int i = 0; i < n_; ++i) { + cerr << get_2bit_value(g_, i) << " "; + } while (beg_idx_v < vertex) { - if (g_[beg_idx_v] != kUnassigned) ++base_rank; + cerr << get_2bit_value(g_, beg_idx_v) << " "; + if (get_2bit_value(g_, beg_idx_v) != kUnassigned) ++base_rank; ++beg_idx_v; } + cerr << "Base rank: " << base_rank << endl; return base_rank; } diff --git a/cxxmph/mphtable.h b/cxxmph/mphtable.h index c0ef402..84d56df 100644 --- a/cxxmph/mphtable.h +++ b/cxxmph/mphtable.h @@ -22,7 +22,7 @@ class MPHTable { // This class could be a template for both key type and hash function, but we // chose to go with simplicity. typedef StringPiece key_type; - typedef RandomlySeededHashFunction hasher_type; + typedef RandomlySeededHashFunction hasher_type; MPHTable(double c = 1.23, cmph_uint8 b = 7) : c_(c), b_(b) { } ~MPHTable() {} @@ -82,7 +82,9 @@ bool MPHTable::Reset(ForwardIterator begin, ForwardIterator end) { std::vector queue; while (1) { cerr << "Iterations missing: " << iterations << endl; - for (int i = 0; i < 3; ++i) hash_function_[i] = hasher_type(); + // for (int i = 0; i < 3; ++i) hash_function_[i] = hasher_type(); + hash_function_[0] = hasher_type(); + cerr << "Seed: " << hash_function_[0].seed << endl; if (Mapping(begin, end, &edges, &queue)) break; else --iterations; if (iterations == 0) break; @@ -101,11 +103,12 @@ bool MPHTable::Mapping( TriGraph graph(n_, m_); for (ForwardIterator it = begin; it != end; ++it) { cmph_uint32 h[3]; - for (int i = 0; i < 3; ++i) h[i] = hash_function_[i](*it); + // for (int i = 0; i < 3; ++i) h[i] = hash_function_[i](*it); + hash_function_[0](*it, h); cmph_uint32 v0 = h[0] % r_; cmph_uint32 v1 = h[1] % r_ + r_; cmph_uint32 v2 = h[2] % r_ + (r_ << 1); - cerr << "Key: " << *it << " vertex " << it - begin << " (" << v0 << "," << v1 << "," << v2 << ")" << endl; + cerr << "Key: " << *it << " edge " << it - begin << " (" << v0 << "," << v1 << "," << v2 << ")" << endl; graph.AddEdge(TriGraph::Edge(v0, v1, v2)); } if (GenerateQueue(&graph, queue)) { diff --git a/cxxmph/mphtable_test.cc b/cxxmph/mphtable_test.cc index b08ffc5..8986ee0 100644 --- a/cxxmph/mphtable_test.cc +++ b/cxxmph/mphtable_test.cc @@ -9,6 +9,8 @@ using std::vector; using cxxmph::MPHTable; int main(int argc, char** argv) { + + srand(1); vector keys; keys.push_back("davi"); keys.push_back("paulo"); diff --git a/cxxmph/randomly_seeded_hash.h b/cxxmph/randomly_seeded_hash.h index fa382dd..60ab32d 100644 --- a/cxxmph/randomly_seeded_hash.h +++ b/cxxmph/randomly_seeded_hash.h @@ -8,6 +8,7 @@ #include "../src/cmph_types.h" #include "MurmurHash2.h" +#include "jenkins_hash.h" #include "stringpiece.h" namespace cxxmph { @@ -15,9 +16,25 @@ namespace cxxmph { template struct RandomlySeededHashFunction { }; +class JenkinsStringPiece { }; class Murmur2StringPiece { }; class Murmur2Pod { }; +template <> +struct RandomlySeededHashFunction { + RandomlySeededHashFunction() { + srand(1); + seed = 4; + } + cmph_uint32 operator()(const StringPiece& key) const { + return jenkins_hash(key.data(), key.length(), seed); + } + void operator()(const StringPiece& key, cmph_uint32* hashes) const { + __jenkins_hash_vector(seed, key.data(), key.length(), hashes); + } + cmph_uint32 seed; +}; + template <> struct RandomlySeededHashFunction { RandomlySeededHashFunction() : seed(random()) { } diff --git a/src/bdz.c b/src/bdz.c index f422c8f..5dce597 100755 --- a/src/bdz.c +++ b/src/bdz.c @@ -9,7 +9,7 @@ #include #include #include -//#define DEBUG +#define DEBUG #include "debug.h" #define UNASSIGNED 3U #define NULL_EDGE 0xffffffff @@ -177,9 +177,11 @@ static int bdz_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_que } }; }; + DEBUGP("Queue head %d Queue tail %d\n", queue_head, queue_tail); while(queue_tail!=queue_head){ curr_edge=queue[queue_tail++]; bdz_remove_edge(graph3,curr_edge); + DEBUGP("Removing edge %d\n", curr_edge); v0=graph3->edges[curr_edge].vertices[0]; v1=graph3->edges[curr_edge].vertices[1]; v2=graph3->edges[curr_edge].vertices[2]; @@ -403,6 +405,7 @@ static int bdz_mapping(cmph_config_t *mph, bdz_graph3_t* graph3, bdz_queue_t que h0 = hl[0] % bdz->r; h1 = hl[1] % bdz->r + bdz->r; h2 = hl[2] % bdz->r + (bdz->r << 1); + DEBUGP("Key: %s (%u %u %u)\n", key, h0, h1, h2); mph->key_source->dispose(mph->key_source->data, key, keylen); bdz_add_edge(graph3,h0,h1,h2); } @@ -427,7 +430,7 @@ static void assigning(bdz_config_data_t *bdz, bdz_graph3_t* graph3, bdz_queue_t v0=graph3->edges[curr_edge].vertices[0]; v1=graph3->edges[curr_edge].vertices[1]; v2=graph3->edges[curr_edge].vertices[2]; - DEBUGP("B:%u %u %u -- %u %u %u\n", v0, v1, v2, GETVALUE(bdz->g, v0), GETVALUE(bdz->g, v1), GETVALUE(bdz->g, v2)); + DEBUGP("B:%u %u %u -- %u %u %u edge %u\n", v0, v1, v2, GETVALUE(bdz->g, v0), GETVALUE(bdz->g, v1), GETVALUE(bdz->g, v2), curr_edge); if(!GETBIT(marked_vertices, v0)){ if(!GETBIT(marked_vertices,v1)) { @@ -585,7 +588,9 @@ static inline cmph_uint32 rank(cmph_uint32 b, cmph_uint32 * ranktable, cmph_uint base_rank += bdz_lookup_table[*(g + beg_idx_b++)]; } + DEBUGP("base rank %u\n", base_rank); beg_idx_v = beg_idx_b << 2; + DEBUGP("beg_idx_v %u\n", beg_idx_v); while(beg_idx_v < vertex) { if(GETVALUE(g, beg_idx_v) != UNASSIGNED) base_rank++; @@ -605,6 +610,7 @@ cmph_uint32 bdz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen) hl[1] = hl[1] % bdz->r + bdz->r; hl[2] = hl[2] % bdz->r + (bdz->r << 1); vertex = hl[(GETVALUE(bdz->g, hl[0]) + GETVALUE(bdz->g, hl[1]) + GETVALUE(bdz->g, hl[2])) % 3]; + DEBUGP("Search found vertex %u\n", vertex); return rank(bdz->b, bdz->ranktable, bdz->g, vertex); } diff --git a/src/jenkins_hash.c b/src/jenkins_hash.c index f5233a5..4697f74 100644 --- a/src/jenkins_hash.c +++ b/src/jenkins_hash.c @@ -7,7 +7,7 @@ #include #include -//#define DEBUG +#define DEBUG #include "debug.h" #define hashsize(n) ((cmph_uint32)1<<(n)) @@ -87,8 +87,8 @@ acceptable. Do NOT use for cryptographic purposes. jenkins_state_t *jenkins_state_new(cmph_uint32 size) //size of hash table { jenkins_state_t *state = (jenkins_state_t *)malloc(sizeof(jenkins_state_t)); - DEBUGP("Initializing jenkins hash\n"); state->seed = ((cmph_uint32)rand() % size); + DEBUGP("Initializied jenkins hash with seed %d\n", state->seed); return state; } void jenkins_state_destroy(jenkins_state_t *state)