It works.
This commit is contained in:
parent
385ce27a10
commit
22d149d3a8
@ -36,7 +36,7 @@ static const cmph_uint8 valuemask[] = { 0xfc, 0xf3, 0xcf, 0x3f};
|
||||
void set_2bit_value(vector<cmph_uint8> *d, cmph_uint8 i, cmph_uint8 v) {
|
||||
(*d)[(i >> 2)] &= (v << ((i & 3) << 1)) | valuemask[i & 3];
|
||||
}
|
||||
cmph_uint8 get_2bit_value(const vector<cmph_uint8>& d, cmph_uint8 i) {
|
||||
cmph_uint32 get_2bit_value(const vector<cmph_uint8>& d, cmph_uint8 i) {
|
||||
return (d[(i >> 2)] >> ((i & 3) << 1)) & 3;
|
||||
}
|
||||
|
||||
@ -50,7 +50,7 @@ bool MPHTable::GenerateQueue(
|
||||
cmph_uint32 nedges = m_;
|
||||
cmph_uint32 nvertices = n_;
|
||||
// Relies on vector<bool> using 1 bit per element
|
||||
vector<bool> marked_edge((nedges >> 3) + 1, false);
|
||||
vector<bool> marked_edge(nedges + 1, false);
|
||||
vector<cmph_uint32> queue(nvertices, 0);
|
||||
for (cmph_uint32 i = 0; i < nedges; ++i) {
|
||||
const TriGraph::Edge& e = graph->edges()[i];
|
||||
@ -63,6 +63,15 @@ bool MPHTable::GenerateQueue(
|
||||
}
|
||||
}
|
||||
}
|
||||
for (unsigned int i = 0; i < marked_edge.size(); ++i) {
|
||||
cerr << "vertex with degree " << static_cast<cmph_uint32>(graph->vertex_degree()[i]) << " marked " << marked_edge[i] << endl;
|
||||
}
|
||||
for (unsigned int i = 0; i < queue.size(); ++i) {
|
||||
cerr << "vertex " << i << " queued at " << queue[i] << endl;
|
||||
}
|
||||
// At this point queue head is the number of edges touching at least one
|
||||
// vertex of degree 1.
|
||||
cerr << "Queue head " << queue_head << " Queue tail " << queue_tail << endl;
|
||||
while (queue_tail != queue_head) {
|
||||
cmph_uint32 current_edge = queue[queue_tail++];
|
||||
graph->RemoveEdge(current_edge);
|
||||
@ -78,6 +87,9 @@ bool MPHTable::GenerateQueue(
|
||||
}
|
||||
}
|
||||
}
|
||||
for (unsigned int i = 0; i < queue.size(); ++i) {
|
||||
cerr << "vertex " << i << " queued at " << queue[i] << endl;
|
||||
}
|
||||
int cycles = queue_head - nedges;
|
||||
if (cycles == 0) queue.swap(*queue_output);
|
||||
return cycles == 0;
|
||||
@ -85,14 +97,21 @@ bool MPHTable::GenerateQueue(
|
||||
|
||||
void MPHTable::Assigning(
|
||||
const vector<TriGraph::Edge>& edges, const vector<cmph_uint32>& queue) {
|
||||
cmph_uint32 nedges = n_;
|
||||
cmph_uint32 nedges = m_;
|
||||
cmph_uint32 current_edge = 0;
|
||||
vector<bool> marked_vertices(nedges + 1);
|
||||
// Initialize vector of half nibbles with all bits set.
|
||||
vector<cmph_uint8>(nedges, std::numeric_limits<cmph_uint8>::max()).swap(g_);
|
||||
cmph_uint32 sizeg = static_cast<cmph_uint32>(ceil(n_/4.0));
|
||||
vector<cmph_uint8>(sizeg, std::numeric_limits<cmph_uint8>::max()).swap(g_);
|
||||
|
||||
for (int i = nedges - 1; i + 1 >= 1; --i) {
|
||||
current_edge = queue[i];
|
||||
cerr << "Current edge " << current_edge << " at queue pos " << i << endl;
|
||||
const TriGraph::Edge& e = edges[current_edge];
|
||||
cerr << "B: " << e[0] << " " << e[1] << " " << e[2] << " -> "
|
||||
<< get_2bit_value(g_, e[0]) << " "
|
||||
<< get_2bit_value(g_, e[1]) << " "
|
||||
<< get_2bit_value(g_, e[2]) << " " << endl;
|
||||
if (!marked_vertices[e[0]]) {
|
||||
if (!marked_vertices[e[1]]) {
|
||||
set_2bit_value(&g_, e[1], kUnassigned);
|
||||
@ -115,6 +134,10 @@ void MPHTable::Assigning(
|
||||
set_2bit_value(&g_, e[2], (8 - (get_2bit_value(g_, e[0]) + get_2bit_value(g_, e[1]))) % 3);
|
||||
marked_vertices[e[2]] = true;
|
||||
}
|
||||
cerr << "A: " << e[0] << " " << e[1] << " " << e[2] << " -> "
|
||||
<< get_2bit_value(g_, e[0]) << " "
|
||||
<< get_2bit_value(g_, e[1]) << " "
|
||||
<< get_2bit_value(g_, e[2]) << " " << endl;
|
||||
}
|
||||
}
|
||||
|
||||
@ -128,7 +151,7 @@ void MPHTable::Ranking() {
|
||||
vector<cmph_uint32> (ranktablesize).swap(ranktable_);;
|
||||
cmph_uint32 offset = 0;
|
||||
cmph_uint32 count = 0;
|
||||
cmph_uint32 i = 0;
|
||||
cmph_uint32 i = 1;
|
||||
while (1) {
|
||||
if (i == ranktable_.size()) break;
|
||||
cmph_uint32 nbytes = size < nbytes_total ? size : nbytes_total;
|
||||
@ -142,11 +165,13 @@ void MPHTable::Ranking() {
|
||||
|
||||
cmph_uint32 MPHTable::Search(const key_type& key) const {
|
||||
cmph_uint32 h[3];
|
||||
for (int i = 0; i < 3; ++i) h[i] = hash_function_[i](key);
|
||||
// for (int i = 0; i < 3; ++i) h[i] = hash_function_[i](key);
|
||||
hash_function_[0](key, h);
|
||||
h[0] = h[0] % r_;
|
||||
h[1] = h[1] % r_ + r_;
|
||||
h[2] = h[2] % r_ + (r_ << 1);
|
||||
cmph_uint32 vertex = h[(g_[h[0]] + g_[h[1]] + g_[h[2]]) % 3];
|
||||
cmph_uint32 vertex = h[(get_2bit_value(g_, h[0]) + get_2bit_value(g_, h[1]) + get_2bit_value(g_, h[2])) % 3];
|
||||
cerr << "Search found vertex " << vertex << endl;
|
||||
return Rank(vertex);
|
||||
}
|
||||
|
||||
@ -154,14 +179,23 @@ cmph_uint32 MPHTable::Rank(cmph_uint32 vertex) const {
|
||||
cmph_uint32 index = vertex >> b_;
|
||||
cmph_uint32 base_rank = ranktable_[index];
|
||||
cmph_uint32 beg_idx_v = index << b_;
|
||||
cmph_uint32 beg_idx_b = index >> 2;
|
||||
cmph_uint32 end_idx_b = index >> 2;
|
||||
cmph_uint32 beg_idx_b = beg_idx_v >> 2;
|
||||
cmph_uint32 end_idx_b = vertex >> 2;
|
||||
while (beg_idx_b < end_idx_b) base_rank += kBdzLookupTable[g_[beg_idx_b++]];
|
||||
beg_idx_v = beg_idx_b << 2;
|
||||
cerr << "beg_idx_v: " << beg_idx_v << endl;
|
||||
cerr << "base rank: " << base_rank << endl;
|
||||
|
||||
cerr << "G: ";
|
||||
for (unsigned int i = 0; i < n_; ++i) {
|
||||
cerr << get_2bit_value(g_, i) << " ";
|
||||
}
|
||||
while (beg_idx_v < vertex) {
|
||||
if (g_[beg_idx_v] != kUnassigned) ++base_rank;
|
||||
cerr << get_2bit_value(g_, beg_idx_v) << " ";
|
||||
if (get_2bit_value(g_, beg_idx_v) != kUnassigned) ++base_rank;
|
||||
++beg_idx_v;
|
||||
}
|
||||
cerr << "Base rank: " << base_rank << endl;
|
||||
return base_rank;
|
||||
}
|
||||
|
||||
|
@ -22,7 +22,7 @@ class MPHTable {
|
||||
// This class could be a template for both key type and hash function, but we
|
||||
// chose to go with simplicity.
|
||||
typedef StringPiece key_type;
|
||||
typedef RandomlySeededHashFunction<Murmur2StringPiece> hasher_type;
|
||||
typedef RandomlySeededHashFunction<JenkinsStringPiece> hasher_type;
|
||||
|
||||
MPHTable(double c = 1.23, cmph_uint8 b = 7) : c_(c), b_(b) { }
|
||||
~MPHTable() {}
|
||||
@ -82,7 +82,9 @@ bool MPHTable::Reset(ForwardIterator begin, ForwardIterator end) {
|
||||
std::vector<cmph_uint32> queue;
|
||||
while (1) {
|
||||
cerr << "Iterations missing: " << iterations << endl;
|
||||
for (int i = 0; i < 3; ++i) hash_function_[i] = hasher_type();
|
||||
// for (int i = 0; i < 3; ++i) hash_function_[i] = hasher_type();
|
||||
hash_function_[0] = hasher_type();
|
||||
cerr << "Seed: " << hash_function_[0].seed << endl;
|
||||
if (Mapping(begin, end, &edges, &queue)) break;
|
||||
else --iterations;
|
||||
if (iterations == 0) break;
|
||||
@ -101,11 +103,12 @@ bool MPHTable::Mapping(
|
||||
TriGraph graph(n_, m_);
|
||||
for (ForwardIterator it = begin; it != end; ++it) {
|
||||
cmph_uint32 h[3];
|
||||
for (int i = 0; i < 3; ++i) h[i] = hash_function_[i](*it);
|
||||
// for (int i = 0; i < 3; ++i) h[i] = hash_function_[i](*it);
|
||||
hash_function_[0](*it, h);
|
||||
cmph_uint32 v0 = h[0] % r_;
|
||||
cmph_uint32 v1 = h[1] % r_ + r_;
|
||||
cmph_uint32 v2 = h[2] % r_ + (r_ << 1);
|
||||
cerr << "Key: " << *it << " vertex " << it - begin << " (" << v0 << "," << v1 << "," << v2 << ")" << endl;
|
||||
cerr << "Key: " << *it << " edge " << it - begin << " (" << v0 << "," << v1 << "," << v2 << ")" << endl;
|
||||
graph.AddEdge(TriGraph::Edge(v0, v1, v2));
|
||||
}
|
||||
if (GenerateQueue(&graph, queue)) {
|
||||
|
@ -9,6 +9,8 @@ using std::vector;
|
||||
using cxxmph::MPHTable;
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
||||
srand(1);
|
||||
vector<string> keys;
|
||||
keys.push_back("davi");
|
||||
keys.push_back("paulo");
|
||||
|
@ -8,6 +8,7 @@
|
||||
|
||||
#include "../src/cmph_types.h"
|
||||
#include "MurmurHash2.h"
|
||||
#include "jenkins_hash.h"
|
||||
#include "stringpiece.h"
|
||||
|
||||
namespace cxxmph {
|
||||
@ -15,9 +16,25 @@ namespace cxxmph {
|
||||
template <class HashFun>
|
||||
struct RandomlySeededHashFunction { };
|
||||
|
||||
class JenkinsStringPiece { };
|
||||
class Murmur2StringPiece { };
|
||||
class Murmur2Pod { };
|
||||
|
||||
template <>
|
||||
struct RandomlySeededHashFunction<JenkinsStringPiece> {
|
||||
RandomlySeededHashFunction() {
|
||||
srand(1);
|
||||
seed = 4;
|
||||
}
|
||||
cmph_uint32 operator()(const StringPiece& key) const {
|
||||
return jenkins_hash(key.data(), key.length(), seed);
|
||||
}
|
||||
void operator()(const StringPiece& key, cmph_uint32* hashes) const {
|
||||
__jenkins_hash_vector(seed, key.data(), key.length(), hashes);
|
||||
}
|
||||
cmph_uint32 seed;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct RandomlySeededHashFunction<Murmur2StringPiece> {
|
||||
RandomlySeededHashFunction() : seed(random()) { }
|
||||
|
10
src/bdz.c
10
src/bdz.c
@ -9,7 +9,7 @@
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
//#define DEBUG
|
||||
#define DEBUG
|
||||
#include "debug.h"
|
||||
#define UNASSIGNED 3U
|
||||
#define NULL_EDGE 0xffffffff
|
||||
@ -177,9 +177,11 @@ static int bdz_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_que
|
||||
}
|
||||
};
|
||||
};
|
||||
DEBUGP("Queue head %d Queue tail %d\n", queue_head, queue_tail);
|
||||
while(queue_tail!=queue_head){
|
||||
curr_edge=queue[queue_tail++];
|
||||
bdz_remove_edge(graph3,curr_edge);
|
||||
DEBUGP("Removing edge %d\n", curr_edge);
|
||||
v0=graph3->edges[curr_edge].vertices[0];
|
||||
v1=graph3->edges[curr_edge].vertices[1];
|
||||
v2=graph3->edges[curr_edge].vertices[2];
|
||||
@ -403,6 +405,7 @@ static int bdz_mapping(cmph_config_t *mph, bdz_graph3_t* graph3, bdz_queue_t que
|
||||
h0 = hl[0] % bdz->r;
|
||||
h1 = hl[1] % bdz->r + bdz->r;
|
||||
h2 = hl[2] % bdz->r + (bdz->r << 1);
|
||||
DEBUGP("Key: %s (%u %u %u)\n", key, h0, h1, h2);
|
||||
mph->key_source->dispose(mph->key_source->data, key, keylen);
|
||||
bdz_add_edge(graph3,h0,h1,h2);
|
||||
}
|
||||
@ -427,7 +430,7 @@ static void assigning(bdz_config_data_t *bdz, bdz_graph3_t* graph3, bdz_queue_t
|
||||
v0=graph3->edges[curr_edge].vertices[0];
|
||||
v1=graph3->edges[curr_edge].vertices[1];
|
||||
v2=graph3->edges[curr_edge].vertices[2];
|
||||
DEBUGP("B:%u %u %u -- %u %u %u\n", v0, v1, v2, GETVALUE(bdz->g, v0), GETVALUE(bdz->g, v1), GETVALUE(bdz->g, v2));
|
||||
DEBUGP("B:%u %u %u -- %u %u %u edge %u\n", v0, v1, v2, GETVALUE(bdz->g, v0), GETVALUE(bdz->g, v1), GETVALUE(bdz->g, v2), curr_edge);
|
||||
if(!GETBIT(marked_vertices, v0)){
|
||||
if(!GETBIT(marked_vertices,v1))
|
||||
{
|
||||
@ -585,7 +588,9 @@ static inline cmph_uint32 rank(cmph_uint32 b, cmph_uint32 * ranktable, cmph_uint
|
||||
base_rank += bdz_lookup_table[*(g + beg_idx_b++)];
|
||||
|
||||
}
|
||||
DEBUGP("base rank %u\n", base_rank);
|
||||
beg_idx_v = beg_idx_b << 2;
|
||||
DEBUGP("beg_idx_v %u\n", beg_idx_v);
|
||||
while(beg_idx_v < vertex)
|
||||
{
|
||||
if(GETVALUE(g, beg_idx_v) != UNASSIGNED) base_rank++;
|
||||
@ -605,6 +610,7 @@ cmph_uint32 bdz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||
hl[1] = hl[1] % bdz->r + bdz->r;
|
||||
hl[2] = hl[2] % bdz->r + (bdz->r << 1);
|
||||
vertex = hl[(GETVALUE(bdz->g, hl[0]) + GETVALUE(bdz->g, hl[1]) + GETVALUE(bdz->g, hl[2])) % 3];
|
||||
DEBUGP("Search found vertex %u\n", vertex);
|
||||
return rank(bdz->b, bdz->ranktable, bdz->g, vertex);
|
||||
}
|
||||
|
||||
|
@ -7,7 +7,7 @@
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
|
||||
//#define DEBUG
|
||||
#define DEBUG
|
||||
#include "debug.h"
|
||||
|
||||
#define hashsize(n) ((cmph_uint32)1<<(n))
|
||||
@ -87,8 +87,8 @@ acceptable. Do NOT use for cryptographic purposes.
|
||||
jenkins_state_t *jenkins_state_new(cmph_uint32 size) //size of hash table
|
||||
{
|
||||
jenkins_state_t *state = (jenkins_state_t *)malloc(sizeof(jenkins_state_t));
|
||||
DEBUGP("Initializing jenkins hash\n");
|
||||
state->seed = ((cmph_uint32)rand() % size);
|
||||
DEBUGP("Initializied jenkins hash with seed %d\n", state->seed);
|
||||
return state;
|
||||
}
|
||||
void jenkins_state_destroy(jenkins_state_t *state)
|
||||
|
Loading…
Reference in New Issue
Block a user