From 676d34073c01617f1ffb1d2c0094b7ea9baddee1 Mon Sep 17 00:00:00 2001 From: davi Date: Mon, 8 Nov 2010 22:02:18 -0200 Subject: [PATCH] Fixed first_edge initialization bug. --- cxxmph/cmph_hash_map.h | 6 ++- cxxmph/mphtable.cc | 1 + cxxmph/mphtable.h | 6 +++ cxxmph/trigraph.cc | 14 +++++- cxxmph/trigraph.h | 1 + src/Makefile.am | 2 +- src/bdz.c | 1 + src/jenkins_hash.c | 110 +++++++++++++++++++++-------------------- 8 files changed, 84 insertions(+), 57 deletions(-) diff --git a/cxxmph/cmph_hash_map.h b/cxxmph/cmph_hash_map.h index 871d4b1..12e98a0 100644 --- a/cxxmph/cmph_hash_map.h +++ b/cxxmph/cmph_hash_map.h @@ -110,8 +110,10 @@ CMPH_METHOD_DECL(void_type, rehash)() { << slack_.size() << " keys in slack " << values_.size() << " key in total" << std::endl; slack_type().swap(slack_); - table_.Reset(make_iterator_first(values_.begin()), - make_iterator_first(values_.end())); + bool success = table_.Reset( + make_iterator_first(values_.begin()), + make_iterator_first(values_.end())); + assert(success); std::vector new_values(values_.size()); for (const_iterator it = values_.begin(), end = values_.end(); it != end; ++it) { diff --git a/cxxmph/mphtable.cc b/cxxmph/mphtable.cc index d3537a9..669df06 100644 --- a/cxxmph/mphtable.cc +++ b/cxxmph/mphtable.cc @@ -69,6 +69,7 @@ bool MPHTable::GenerateQueue( // At this point queue head is the number of edges touching at least one // vertex of degree 1. cerr << "Queue head " << queue_head << " Queue tail " << queue_tail << endl; + graph->DebugGraph(); while (queue_tail != queue_head) { cmph_uint32 current_edge = queue[queue_tail++]; graph->RemoveEdge(current_edge); diff --git a/cxxmph/mphtable.h b/cxxmph/mphtable.h index ce2517e..46726b6 100644 --- a/cxxmph/mphtable.h +++ b/cxxmph/mphtable.h @@ -3,6 +3,7 @@ // Minimal perfect hash abstraction implementing the BDZ algorithm +#include #include #include // for std::hash #include @@ -129,6 +130,11 @@ cmph_uint32 MPHTable::index(const Key& key) const { h[0] = h[0] % r_; h[1] = h[1] % r_ + r_; h[2] = h[2] % r_ + (r_ << 1); + assert(g_.size()); + cerr << "g_.size() " << g_.size() << " h0 >> 2 " << (h[0] >> 2) << endl; + assert((h[0] >> 2) > 2) > 2) ::max(); +static const cmph_uint32 kInvalidEdge = std::numeric_limits::max(); } namespace cxxmph { @@ -65,5 +65,17 @@ void TriGraph::RemoveEdge(cmph_uint32 current_edge) { --vertex_degree_[vertex]; } } + +void TriGraph::DebugGraph() const { + int i; + for(i = 0; i < edges_.size(); i++){ + cerr << i << " " << edges_[i][0] << " " << edges_[i][1] << " " << edges_[i][2] + << " nexts " << next_edge_[i][0] << " " << next_edge_[i][1] << " " << next_edge_[i][2] << endl; + } + for(i = 0; i < first_edge_.size();i++){ + cerr << "first for vertice " <* edges); + void DebugGraph() const; const std::vector& edges() const { return edges_; } const std::vector& vertex_degree() const { return vertex_degree_; } diff --git a/src/Makefile.am b/src/Makefile.am index f3896dc..40734e4 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -2,7 +2,7 @@ bin_PROGRAMS = cmph lib_LTLIBRARIES = libcmph.la include_HEADERS = cmph.h cmph_types.h cmph_time.h chd_ph.h libcmph_la_SOURCES = hash.h hash.c \ - jenkins_hash.h jenkins_hash.c\ + jenkins_hash.h jenkins_hash.c MurmurHash2.h\ hash_state.h debug.h \ vstack.h vstack.c vqueue.h vqueue.c\ graph.h graph.c bitbool.h \ diff --git a/src/bdz.c b/src/bdz.c index 5dce597..059c281 100755 --- a/src/bdz.c +++ b/src/bdz.c @@ -178,6 +178,7 @@ static int bdz_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_que }; }; DEBUGP("Queue head %d Queue tail %d\n", queue_head, queue_tail); + bdz_dump_graph(graph3,graph3->nedges,graph3->nedges+graph3->nedges/4); while(queue_tail!=queue_head){ curr_edge=queue[queue_tail++]; bdz_remove_edge(graph3,curr_edge); diff --git a/src/jenkins_hash.c b/src/jenkins_hash.c index 4697f74..5d4e807 100644 --- a/src/jenkins_hash.c +++ b/src/jenkins_hash.c @@ -9,6 +9,7 @@ #define DEBUG #include "debug.h" +#include "MurmurHash2.h" #define hashsize(n) ((cmph_uint32)1<<(n)) #define hashmask(n) (hashsize(n)-1) @@ -99,63 +100,67 @@ void jenkins_state_destroy(jenkins_state_t *state) inline void __jenkins_hash_vector(cmph_uint32 seed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes) { - register cmph_uint32 len, length; - - /* Set up the internal state */ - length = keylen; - len = length; - hashes[0] = hashes[1] = 0x9e3779b9; /* the golden ratio; an arbitrary value */ - hashes[2] = seed; /* the previous hash value - seed in our case */ - - /*---------------------------------------- handle most of the key */ - while (len >= 12) - { - hashes[0] += ((cmph_uint32)k[0] +((cmph_uint32)k[1]<<8) +((cmph_uint32)k[2]<<16) +((cmph_uint32)k[3]<<24)); - hashes[1] += ((cmph_uint32)k[4] +((cmph_uint32)k[5]<<8) +((cmph_uint32)k[6]<<16) +((cmph_uint32)k[7]<<24)); - hashes[2] += ((cmph_uint32)k[8] +((cmph_uint32)k[9]<<8) +((cmph_uint32)k[10]<<16)+((cmph_uint32)k[11]<<24)); - mix(hashes[0],hashes[1],hashes[2]); - k += 12; len -= 12; + int i; + for (i = 0; i < 3; ++i) { + hashes[i] = MurmurHash2(k, keylen, seed + i); } - - /*------------------------------------- handle the last 11 bytes */ - hashes[2] += length; - switch(len) /* all the case statements fall through */ - { - case 11: - hashes[2] +=((cmph_uint32)k[10]<<24); - case 10: - hashes[2] +=((cmph_uint32)k[9]<<16); - case 9 : - hashes[2] +=((cmph_uint32)k[8]<<8); - /* the first byte of hashes[2] is reserved for the length */ - case 8 : - hashes[1] +=((cmph_uint32)k[7]<<24); - case 7 : - hashes[1] +=((cmph_uint32)k[6]<<16); - case 6 : - hashes[1] +=((cmph_uint32)k[5]<<8); - case 5 : - hashes[1] +=(cmph_uint8) k[4]; - case 4 : - hashes[0] +=((cmph_uint32)k[3]<<24); - case 3 : - hashes[0] +=((cmph_uint32)k[2]<<16); - case 2 : - hashes[0] +=((cmph_uint32)k[1]<<8); - case 1 : - hashes[0] +=(cmph_uint8)k[0]; - /* case 0: nothing left to add */ - } - - mix(hashes[0],hashes[1],hashes[2]); +// register cmph_uint32 len, length; +// +// /* Set up the internal state */ +// length = keylen; +// len = length; +// hashes[0] = hashes[1] = 0x9e3779b9; /* the golden ratio; an arbitrary value */ +// hashes[2] = seed; /* the previous hash value - seed in our case */ +// +// /*---------------------------------------- handle most of the key */ +// while (len >= 12) +// { +// hashes[0] += ((cmph_uint32)k[0] +((cmph_uint32)k[1]<<8) +((cmph_uint32)k[2]<<16) +((cmph_uint32)k[3]<<24)); +// hashes[1] += ((cmph_uint32)k[4] +((cmph_uint32)k[5]<<8) +((cmph_uint32)k[6]<<16) +((cmph_uint32)k[7]<<24)); +// hashes[2] += ((cmph_uint32)k[8] +((cmph_uint32)k[9]<<8) +((cmph_uint32)k[10]<<16)+((cmph_uint32)k[11]<<24)); +// mix(hashes[0],hashes[1],hashes[2]); +// k += 12; len -= 12; +// } +// +// /*------------------------------------- handle the last 11 bytes */ +// hashes[2] += length; +// switch(len) /* all the case statements fall through */ +// { +// case 11: +// hashes[2] +=((cmph_uint32)k[10]<<24); +// case 10: +// hashes[2] +=((cmph_uint32)k[9]<<16); +// case 9 : +// hashes[2] +=((cmph_uint32)k[8]<<8); +// /* the first byte of hashes[2] is reserved for the length */ +// case 8 : +// hashes[1] +=((cmph_uint32)k[7]<<24); +// case 7 : +// hashes[1] +=((cmph_uint32)k[6]<<16); +// case 6 : +// hashes[1] +=((cmph_uint32)k[5]<<8); +// case 5 : +// hashes[1] +=(cmph_uint8) k[4]; +// case 4 : +// hashes[0] +=((cmph_uint32)k[3]<<24); +// case 3 : +// hashes[0] +=((cmph_uint32)k[2]<<16); +// case 2 : +// hashes[0] +=((cmph_uint32)k[1]<<8); +// case 1 : +// hashes[0] +=(cmph_uint8)k[0]; +// /* case 0: nothing left to add */ +// } +// +// mix(hashes[0],hashes[1],hashes[2]); } cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keylen) { - cmph_uint32 hashes[3]; - __jenkins_hash_vector(state->seed, k, keylen, hashes); - return hashes[2]; -/* cmph_uint32 a, b, c; +// cmph_uint32 hashes[3]; +// __jenkins_hash_vector(state->seed, k, keylen, hashes); +// return hashes[2]; + cmph_uint32 a, b, c; cmph_uint32 len, length; // Set up the internal state @@ -209,7 +214,6 @@ cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keyl /// report the result return c; - */ } void jenkins_hash_vector_(jenkins_state_t *state, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)