From 5b62735b395fa1c1536ac8379e34f1d743efca6f Mon Sep 17 00:00:00 2001 From: fc_botelho Date: Sat, 29 Mar 2008 01:48:15 +0000 Subject: [PATCH] *** empty log message *** --- src/bdz.c | 51 +++++---- src/bdz_ph.c | 31 +++-- src/bmz.c | 57 +++++---- src/bmz8.c | 52 ++++++--- src/brz.c | 293 +++++++++++++++++++++++++++++++++++++++++++---- src/chm.c | 54 +++++---- src/cmph.c | 55 ++++----- src/cmph_types.h | 22 ++++ src/fch.c | 85 +++++++++++++- src/hash.c | 62 +++++----- src/hash.h | 37 +++--- src/main.c | 1 + 12 files changed, 611 insertions(+), 189 deletions(-) diff --git a/src/bdz.c b/src/bdz.c index 1318862..cae93a6 100755 --- a/src/bdz.c +++ b/src/bdz.c @@ -634,31 +634,34 @@ cmph_uint32 bdz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 ke void bdz_pack(cmph_t *mphf, void *packed_mphf) { bdz_data_t *data = (bdz_data_t *)mphf->data; - cmph_uint32 * ptr = packed_mphf; + cmph_uint8 * ptr = packed_mphf; + + // packing hl type + CMPH_HASH hl_type = hash_get_type(data->hl); + *((cmph_uint32 *) ptr) = hl_type; + ptr += sizeof(cmph_uint32); // packing hl hash_state_pack(data->hl, ptr); + ptr += hash_state_packed_size(hl_type); - - ptr += (hash_state_packed_size(data->hl) >> 2); // (hash_state_packed_size(data->hl) / 4); - // packing r - *ptr++ = data->r; + *((cmph_uint32 *) ptr) = data->r; + ptr += sizeof(data->r); // packing ranktablesize - *ptr++ = data->ranktablesize; + *((cmph_uint32 *) ptr) = data->ranktablesize; + ptr += sizeof(data->ranktablesize); // packing ranktable memcpy(ptr, data->ranktable, sizeof(cmph_uint32)*(data->ranktablesize)); - ptr += data->ranktablesize; + ptr += sizeof(cmph_uint32)*(data->ranktablesize); - cmph_uint8 * ptr8 = (cmph_uint8 *) ptr; - // packing b - *ptr8++ = data->b; + *ptr++ = data->b; // packing g - memcpy(ptr8, data->g, sizeof(cmph_uint8)*((data->n >> 2) +1)); + memcpy(ptr, data->g, sizeof(cmph_uint8)*((data->n >> 2) +1)); } /** \fn cmph_uint32 bdz_packed_size(cmph_t *mphf); @@ -669,7 +672,10 @@ void bdz_pack(cmph_t *mphf, void *packed_mphf) cmph_uint32 bdz_packed_size(cmph_t *mphf) { bdz_data_t *data = (bdz_data_t *)mphf->data; - return (sizeof(CMPH_ALGO) + hash_state_packed_size(data->hl) + (sizeof(cmph_uint32) << 1) + sizeof(cmph_uint32)*(data->ranktablesize) + sizeof(cmph_uint8) + sizeof(cmph_uint8)*((data->n >> 2) +1)); + + CMPH_HASH hl_type = hash_get_type(data->hl); + + return (sizeof(CMPH_ALGO) + hash_state_packed_size(hl_type) + 3*sizeof(cmph_uint32) + sizeof(cmph_uint32)*(data->ranktablesize) + sizeof(cmph_uint8) + sizeof(cmph_uint8)*((data->n >> 2) +1)); } /** cmph_uint32 bdz_search(void *packed_mphf, const char *key, cmph_uint32 keylen); @@ -681,21 +687,20 @@ cmph_uint32 bdz_packed_size(cmph_t *mphf) */ cmph_uint32 bdz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen) { - register cmph_uint32 vertex; - register cmph_uint32 *hl_ptr = (cmph_uint32 *)packed_mphf; - register cmph_uint32 hl_size = *hl_ptr; - register cmph_uint32 *ptr = hl_ptr + (hl_size >> 2); // h2_ptr + h2_size/4 - - register cmph_uint32 r = *ptr++; - register cmph_uint32 ranktablesize = *ptr++; - register cmph_uint32 *ranktable = ptr; - ptr += ranktablesize; - register cmph_uint8 * g = (cmph_uint8 *)ptr; + register cmph_uint32 vertex; + register CMPH_HASH hl_type = *(cmph_uint32 *)packed_mphf; + register cmph_uint8 *hl_ptr = (cmph_uint8 *)(packed_mphf + 4); + + register cmph_uint32 *ranktable = (cmph_uint32*)(hl_ptr + hash_state_packed_size(hl_type)); + + register cmph_uint32 r = *ranktable++; + register cmph_uint32 ranktablesize = *ranktable++; + register cmph_uint8 * g = (cmph_uint8 *)(ranktable + ranktablesize); register cmph_uint8 b = *g++; cmph_uint32 hl[3]; - hash_vector_packed(hl_ptr, key, keylen, hl); + hash_vector_packed(hl_ptr, hl_type, key, keylen, hl); hl[0] = hl[0] % r; hl[1] = hl[1] % r + r; hl[2] = hl[2] % r + (r << 1); diff --git a/src/bdz_ph.c b/src/bdz_ph.c index 4a76de6..50bcafa 100755 --- a/src/bdz_ph.c +++ b/src/bdz_ph.c @@ -569,16 +569,20 @@ cmph_uint32 bdz_ph_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 void bdz_ph_pack(cmph_t *mphf, void *packed_mphf) { bdz_ph_data_t *data = (bdz_ph_data_t *)mphf->data; - cmph_uint32 * ptr = packed_mphf; + cmph_uint8 * ptr = packed_mphf; + + // packing hl type + CMPH_HASH hl_type = hash_get_type(data->hl); + *((cmph_uint32 *) ptr) = hl_type; + ptr += sizeof(cmph_uint32); // packing hl hash_state_pack(data->hl, ptr); - - - ptr += (hash_state_packed_size(data->hl) >> 2); // (hash_state_packed_size(data->hl) / 4); + ptr += hash_state_packed_size(hl_type); // packing r - *ptr++ = data->r; + *((cmph_uint32 *) ptr) = data->r; + ptr += sizeof(data->r); // packing g memcpy(ptr, data->g, sizeof(cmph_uint8)*((data->n/5)+1)); @@ -592,7 +596,9 @@ void bdz_ph_pack(cmph_t *mphf, void *packed_mphf) cmph_uint32 bdz_ph_packed_size(cmph_t *mphf) { bdz_ph_data_t *data = (bdz_ph_data_t *)mphf->data; - return (sizeof(CMPH_ALGO) + hash_state_packed_size(data->hl) + sizeof(cmph_uint32) + sizeof(cmph_uint8)*((data->n/5)+1)); + CMPH_HASH hl_type = hash_get_type(data->hl); + + return (sizeof(CMPH_ALGO) + hash_state_packed_size(hl_type) + 2*sizeof(cmph_uint32) + sizeof(cmph_uint8)*((data->n/5)+1)); } /** cmph_uint32 bdz_ph_search(void *packed_mphf, const char *key, cmph_uint32 keylen); @@ -605,18 +611,19 @@ cmph_uint32 bdz_ph_packed_size(cmph_t *mphf) cmph_uint32 bdz_ph_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen) { - register cmph_uint32 *hl_ptr = (cmph_uint32 *)packed_mphf; - register cmph_uint32 hl_size = *hl_ptr; - register cmph_uint32 *ptr = hl_ptr + (hl_size >> 2); // h2_ptr + h2_size/4 + register CMPH_HASH hl_type = *(cmph_uint32 *)packed_mphf; + register cmph_uint8 *hl_ptr = (cmph_uint8 *)(packed_mphf + 4); + + register cmph_uint8 * ptr = hl_ptr + hash_state_packed_size(hl_type); - register cmph_uint32 r = *ptr++; - register cmph_uint8 * g = (cmph_uint8 *)ptr; + register cmph_uint32 r = *((cmph_uint32*) ptr); + register cmph_uint8 * g = ptr + 4; cmph_uint32 hl[3]; register cmph_uint8 byte0, byte1, byte2; register cmph_uint32 vertex; - hash_vector_packed(hl_ptr, key, keylen, hl); + hash_vector_packed(hl_ptr, hl_type, key, keylen, hl); hl[0] = hl[0] % r; hl[1] = hl[1] % r + r; diff --git a/src/bmz.c b/src/bmz.c index ebab981..9e48ed4 100644 --- a/src/bmz.c +++ b/src/bmz.c @@ -575,23 +575,34 @@ cmph_uint32 bmz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 ke */ void bmz_pack(cmph_t *mphf, void *packed_mphf) { + bmz_data_t *data = (bmz_data_t *)mphf->data; - cmph_uint32 * ptr = packed_mphf; - + cmph_uint8 * ptr = packed_mphf; + + // packing h1 type + CMPH_HASH h1_type = hash_get_type(data->hashes[0]); + *((cmph_uint32 *) ptr) = h1_type; + ptr += sizeof(cmph_uint32); + // packing h1 hash_state_pack(data->hashes[0], ptr); - - ptr += (hash_state_packed_size(data->hashes[0]) >> 2); // (hash_state_packed_size(data->hashes[0]) / 4); - + ptr += hash_state_packed_size(h1_type); + + // packing h2 type + CMPH_HASH h2_type = hash_get_type(data->hashes[1]); + *((cmph_uint32 *) ptr) = h2_type; + ptr += sizeof(cmph_uint32); + // packing h2 hash_state_pack(data->hashes[1], ptr); - ptr += (hash_state_packed_size(data->hashes[1]) >> 2); // (hash_state_packed_size(data->hashes[1]) / 4); + ptr += hash_state_packed_size(h2_type); // packing n - *ptr++ = data->n; - + *((cmph_uint32 *) ptr) = data->n; + ptr += sizeof(data->n); + // packing g - memcpy(ptr, data->g, sizeof(cmph_uint32)*data->n); + memcpy(ptr, data->g, sizeof(cmph_uint32)*data->n); } /** \fn cmph_uint32 bmz_packed_size(cmph_t *mphf); @@ -602,7 +613,11 @@ void bmz_pack(cmph_t *mphf, void *packed_mphf) cmph_uint32 bmz_packed_size(cmph_t *mphf) { bmz_data_t *data = (bmz_data_t *)mphf->data; - return (sizeof(CMPH_ALGO) + 2*hash_state_packed_size(data->hashes[0]) + sizeof(cmph_uint32) + sizeof(cmph_uint32)*data->n); + CMPH_HASH h1_type = hash_get_type(data->hashes[0]); + CMPH_HASH h2_type = hash_get_type(data->hashes[1]); + + return (sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) + + 3*sizeof(cmph_uint32) + sizeof(cmph_uint32)*data->n); } /** cmph_uint32 bmz_search(void *packed_mphf, const char *key, cmph_uint32 keylen); @@ -614,20 +629,22 @@ cmph_uint32 bmz_packed_size(cmph_t *mphf) */ cmph_uint32 bmz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen) { - register cmph_uint32 *h1_ptr = (cmph_uint32 *)packed_mphf; - register cmph_uint32 h1_size = *h1_ptr; + register cmph_uint8 *h1_ptr = packed_mphf; + register CMPH_HASH h1_type = *((cmph_uint32 *)h1_ptr); + h1_ptr += 4; - register cmph_uint32 *h2_ptr = h1_ptr + (h1_size >> 2); // h1_ptr + h1_size/4 - register cmph_uint32 h2_size = *h2_ptr; + register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type); + register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr); + h2_ptr += 4; - register cmph_uint32 *g_ptr = h2_ptr + (h2_size >> 2); // h2_ptr + h2_size/4 + register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type)); register cmph_uint32 n = *g_ptr++; - register cmph_uint32 h1 = hash_packed(h1_ptr, key, keylen) % n; - register cmph_uint32 h2 = hash_packed(h2_ptr, key, keylen) % n; - + register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n; + register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n; + DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2); if (h1 == h2 && ++h2 > n) h2 = 0; - - return (g_ptr[h1] + g_ptr[h2]); + DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, g_ptr[h1], g_ptr[h2], m); + return (g_ptr[h1] + g_ptr[h2]); } diff --git a/src/bmz8.c b/src/bmz8.c index 0979735..81f9f5f 100644 --- a/src/bmz8.c +++ b/src/bmz8.c @@ -586,22 +586,30 @@ void bmz8_pack(cmph_t *mphf, void *packed_mphf) { bmz8_data_t *data = (bmz8_data_t *)mphf->data; cmph_uint8 * ptr = packed_mphf; - + + // packing h1 type + CMPH_HASH h1_type = hash_get_type(data->hashes[0]); + *((cmph_uint32 *) ptr) = h1_type; + ptr += sizeof(cmph_uint32); + // packing h1 hash_state_pack(data->hashes[0], ptr); - - ptr += hash_state_packed_size(data->hashes[0]); - + ptr += hash_state_packed_size(h1_type); + + // packing h2 type + CMPH_HASH h2_type = hash_get_type(data->hashes[1]); + *((cmph_uint32 *) ptr) = h2_type; + ptr += sizeof(cmph_uint32); + // packing h2 hash_state_pack(data->hashes[1], ptr); - ptr += hash_state_packed_size(data->hashes[1]); + ptr += hash_state_packed_size(h2_type); // packing n *ptr++ = data->n; - + // packing g - memcpy(ptr, data->g, sizeof(cmph_uint8)*data->n); - + memcpy(ptr, data->g, sizeof(cmph_uint8)*data->n); } /** \fn cmph_uint32 bmz8_packed_size(cmph_t *mphf); @@ -612,7 +620,11 @@ void bmz8_pack(cmph_t *mphf, void *packed_mphf) cmph_uint32 bmz8_packed_size(cmph_t *mphf) { bmz8_data_t *data = (bmz8_data_t *)mphf->data; - return (sizeof(CMPH_ALGO) + 2*hash_state_packed_size(data->hashes[0]) + sizeof(cmph_uint8) + sizeof(cmph_uint8)*data->n); + CMPH_HASH h1_type = hash_get_type(data->hashes[0]); + CMPH_HASH h2_type = hash_get_type(data->hashes[1]); + + return (sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) + + 2*sizeof(cmph_uint32) + sizeof(cmph_uint8) + sizeof(cmph_uint8)*data->n); } /** cmph_uint8 bmz8_search(void *packed_mphf, const char *key, cmph_uint32 keylen); @@ -624,20 +636,22 @@ cmph_uint32 bmz8_packed_size(cmph_t *mphf) */ cmph_uint8 bmz8_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen) { - register cmph_uint32 *h1_ptr = (cmph_uint32 *)packed_mphf; - register cmph_uint32 h1_size = *h1_ptr; + register cmph_uint8 *h1_ptr = packed_mphf; + register CMPH_HASH h1_type = *((cmph_uint32 *)h1_ptr); + h1_ptr += 4; - register cmph_uint32 *h2_ptr = h1_ptr + (h1_size >> 2); // h1_ptr + h1_size/4 - register cmph_uint32 h2_size = *h2_ptr; + register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type); + register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr); + h2_ptr += 4; - register cmph_uint8 *g_ptr = (cmph_uint8 *)(h2_ptr + (h2_size >> 2)); // h2_ptr + h2_size/4 + register cmph_uint8 *g_ptr = h2_ptr + hash_state_packed_size(h2_type); register cmph_uint8 n = *g_ptr++; - register cmph_uint8 h1 = hash_packed(h1_ptr, key, keylen) % n; - register cmph_uint8 h2 = hash_packed(h2_ptr, key, keylen) % n; - + register cmph_uint8 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n; + register cmph_uint8 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n; + DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2); if (h1 == h2 && ++h2 > n) h2 = 0; - - return (g_ptr[h1] + g_ptr[h2]); + DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, g_ptr[h1], g_ptr[h2], m); + return (g_ptr[h1] + g_ptr[h2]); } diff --git a/src/brz.c b/src/brz.c index 8e406ad..ef3d3ad 100755 --- a/src/brz.c +++ b/src/brz.c @@ -635,14 +635,19 @@ void brz_load(FILE *f, cmph_t *mphf) return; } -static cmph_uint32 brz_bmz8_search(brz_data_t *brz, const char *key, cmph_uint32 keylen) +static cmph_uint32 brz_bmz8_search(brz_data_t *brz, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint) { - cmph_uint32 h0 = hash(brz->h0, key, keylen) % brz->k; - cmph_uint32 m = brz->size[h0]; - cmph_uint32 n = ceil(brz->c * m); - cmph_uint32 h1 = hash(brz->h1[h0], key, keylen) % n; - cmph_uint32 h2 = hash(brz->h2[h0], key, keylen) % n; - cmph_uint8 mphf_bucket; + register cmph_uint32 h0; + + hash_vector(brz->h0, key, keylen, fingerprint); + h0 = fingerprint[2] % brz->k; + + register cmph_uint32 m = brz->size[h0]; + register cmph_uint32 n = ceil(brz->c * m); + register cmph_uint32 h1 = hash(brz->h1[h0], key, keylen) % n; + register cmph_uint32 h2 = hash(brz->h2[h0], key, keylen) % n; + register cmph_uint8 mphf_bucket; + if (h1 == h2 && ++h2 >= n) h2 = 0; mphf_bucket = brz->g[h0][h1] + brz->g[h0][h2]; DEBUGP("key: %s h1: %u h2: %u h0: %u\n", key, h1, h2, h0); @@ -651,16 +656,20 @@ static cmph_uint32 brz_bmz8_search(brz_data_t *brz, const char *key, cmph_uint32 return (mphf_bucket + brz->offset[h0]); } -static cmph_uint32 brz_fch_search(brz_data_t *brz, const char *key, cmph_uint32 keylen) +static cmph_uint32 brz_fch_search(brz_data_t *brz, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint) { - cmph_uint32 h0 = hash(brz->h0, key, keylen) % brz->k; - cmph_uint32 m = brz->size[h0]; - cmph_uint32 b = fch_calc_b(brz->c, m); - cmph_float32 p1 = fch_calc_p1(m); - cmph_float32 p2 = fch_calc_p2(b); - cmph_uint32 h1 = hash(brz->h1[h0], key, keylen) % m; - cmph_uint32 h2 = hash(brz->h2[h0], key, keylen) % m; - cmph_uint8 mphf_bucket = 0; + register cmph_uint32 h0; + + hash_vector(brz->h0, key, keylen, fingerprint); + h0 = fingerprint[2] % brz->k; + + register cmph_uint32 m = brz->size[h0]; + register cmph_uint32 b = fch_calc_b(brz->c, m); + register cmph_float32 p1 = fch_calc_p1(m); + register cmph_float32 p2 = fch_calc_p2(b); + register cmph_uint32 h1 = hash(brz->h1[h0], key, keylen) % m; + register cmph_uint32 h2 = hash(brz->h2[h0], key, keylen) % m; + register cmph_uint8 mphf_bucket = 0; h1 = mixh10h11h12(b, p1, p2, h1); mphf_bucket = (h2 + brz->g[h0][h1]) % m; return (mphf_bucket + brz->offset[h0]); @@ -669,12 +678,13 @@ static cmph_uint32 brz_fch_search(brz_data_t *brz, const char *key, cmph_uint32 cmph_uint32 brz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen) { brz_data_t *brz = mphf->data; + cmph_uint32 fingerprint[3]; switch(brz->algo) { case CMPH_FCH: - return brz_fch_search(brz, key, keylen); + return brz_fch_search(brz, key, keylen, fingerprint); case CMPH_BMZ8: - return brz_bmz8_search(brz, key, keylen); + return brz_bmz8_search(brz, key, keylen, fingerprint); default: assert(0); } return 0; @@ -716,6 +726,15 @@ void brz_destroy(cmph_t *mphf) */ cmph_uint32 brz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint) { + brz_data_t *brz = mphf->data; + switch(brz->algo) + { + case CMPH_FCH: + return brz_fch_search(brz, key, keylen, fingerprint); + case CMPH_BMZ8: + return brz_bmz8_search(brz, key, keylen, fingerprint); + default: assert(0); + } return 0; } @@ -726,6 +745,88 @@ cmph_uint32 brz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 ke */ void brz_pack(cmph_t *mphf, void *packed_mphf) { + brz_data_t *data = (brz_data_t *)mphf->data; + cmph_uint8 * ptr = packed_mphf; + cmph_uint32 i,n; + + // packing internal algo type + memcpy(ptr, &(data->algo), sizeof(data->algo)); + ptr += sizeof(data->algo); + + // packing h0 type + CMPH_HASH h0_type = hash_get_type(data->h0); + memcpy(ptr, &h0_type, sizeof(h0_type)); + ptr += sizeof(h0_type); + + // packing h0 + hash_state_pack(data->h0, ptr); + ptr += hash_state_packed_size(h0_type); + + // packing k + memcpy(ptr, &(data->k), sizeof(data->k)); + ptr += sizeof(data->k); + + // packing c + *((cmph_uint32 *)ptr) = (cmph_uint32)data->c; + ptr += sizeof(data->c); + + // packing h1 type + CMPH_HASH h1_type = hash_get_type(data->h1[0]); + memcpy(ptr, &h1_type, sizeof(h1_type)); + ptr += sizeof(h1_type); + + // packing h2 type + CMPH_HASH h2_type = hash_get_type(data->h2[0]); + memcpy(ptr, &h2_type, sizeof(h2_type)); + ptr += sizeof(h2_type); + + // packing size + memcpy(ptr, data->size, sizeof(cmph_uint8)*data->k); + ptr += data->k; + + // packing offset + memcpy(ptr, data->offset, sizeof(cmph_uint32)*data->k); + ptr += sizeof(cmph_uint32)*data->k; + + #if defined (__ia64) || defined (__x86_64__) + cmph_uint64 * g_is_ptr = (cmph_uint64 *)ptr; + #else + cmph_uint32 * g_is_ptr = ptr; + #endif + + cmph_uint8 * g_i = (cmph_uint8 *) (g_is_ptr + data->k); + + for(i = 0; i < data->k; i++) + { + #if defined (__ia64) || defined (__x86_64__) + *g_is_ptr++ = (cmph_uint64)g_i; + #else + *g_is_ptr++ = (cmph_uint32)g_i; + #endif + // packing h1[i] + hash_state_pack(data->h1[i], g_i); + g_i += hash_state_packed_size(h1_type); + + // packing h2[i] + hash_state_pack(data->h2[i], g_i); + g_i += hash_state_packed_size(h2_type); + + // packing g_i + switch(data->algo) + { + case CMPH_FCH: + n = fch_calc_b(data->c, data->size[i]); + break; + case CMPH_BMZ8: + n = ceil(data->c * data->size[i]); + break; + default: assert(0); + } + memcpy(g_i, data->g[i], sizeof(cmph_uint8)*n); + g_i += n; + + } + } /** \fn cmph_uint32 brz_packed_size(cmph_t *mphf); @@ -735,7 +836,149 @@ void brz_pack(cmph_t *mphf, void *packed_mphf) */ cmph_uint32 brz_packed_size(cmph_t *mphf) { - return 0; + cmph_uint32 i; + cmph_uint32 size = 0; + brz_data_t *data = (brz_data_t *)mphf->data; + CMPH_HASH h0_type = hash_get_type(data->h0); + CMPH_HASH h1_type = hash_get_type(data->h1[0]); + CMPH_HASH h2_type = hash_get_type(data->h2[0]); + size = (2*sizeof(CMPH_ALGO) + 3*sizeof(CMPH_HASH) + hash_state_packed_size(h0_type) + sizeof(cmph_uint32) + + sizeof(cmph_float32) + sizeof(cmph_uint8)*data->k + sizeof(cmph_uint32)*data->k); + // pointers to g_is + #if defined (__ia64) || defined (__x86_64__) + size += sizeof(cmph_uint64)*data->k; + #else + size += sizeof(cmph_uint32)*data->k; + #endif + + size += hash_state_packed_size(h1_type) * data->k; + size += hash_state_packed_size(h2_type) * data->k; + + cmph_uint32 n = 0; + for(i = 0; i < data->k; i++) + { + switch(data->algo) + { + case CMPH_FCH: + n = fch_calc_b(data->c, data->size[i]); + break; + case CMPH_BMZ8: + n = ceil(data->c * data->size[i]); + break; + default: assert(0); + } + size += n; + } + return size; +} + + + +static cmph_uint32 brz_bmz8_search_packed(cmph_uint32 *packed_mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint) +{ + register CMPH_HASH h0_type = *packed_mphf++; + register cmph_uint32 *h0_ptr = packed_mphf; + packed_mphf = (cmph_uint32 *)(((cmph_uint8 *)packed_mphf) + hash_state_packed_size(h0_type)); + + register cmph_uint32 k = *packed_mphf++; + + register cmph_float32 c = (cmph_float32)(*packed_mphf); + packed_mphf++; + + register CMPH_HASH h1_type = *packed_mphf++; + + register CMPH_HASH h2_type = *packed_mphf++; + + register cmph_uint8 * size = (cmph_uint8 *) packed_mphf; + packed_mphf = (cmph_uint32 *)(size + k); + + register cmph_uint32 * offset = packed_mphf; + packed_mphf += k; + + register cmph_uint32 h0; + + hash_vector_packed(h0_ptr, h0_type, key, keylen, fingerprint); + h0 = fingerprint[2] % k; + + register cmph_uint32 m = size[h0]; + register cmph_uint32 n = ceil(c * m); + + #if defined (__ia64) || defined (__x86_64__) + register cmph_uint64 * g_is_ptr = (cmph_uint64 *)packed_mphf; + #else + register cmph_uint32 * g_is_ptr = packed_mphf; + #endif + + register cmph_uint8 * h1_ptr = (cmph_uint8 *) g_is_ptr[h0]; + + register cmph_uint8 * h2_ptr = h1_ptr + hash_state_packed_size(h1_type); + + register cmph_uint8 * g = h2_ptr + hash_state_packed_size(h2_type); + + register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n; + register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n; + + register cmph_uint8 mphf_bucket; + + if (h1 == h2 && ++h2 >= n) h2 = 0; + mphf_bucket = g[h1] + g[h2]; + DEBUGP("key: %s h1: %u h2: %u h0: %u\n", key, h1, h2, h0); + DEBUGP("key: %s g[h1]: %u g[h2]: %u offset[h0]: %u edges: %u\n", key, g[h1], g[h2], >offset[h0], m); + DEBUGP("Address: %u\n", mphf_bucket + offset[h0]); + return (mphf_bucket + offset[h0]); +} + +static cmph_uint32 brz_fch_search_packed(cmph_uint32 *packed_mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint) +{ + register CMPH_HASH h0_type = *packed_mphf++; + + register cmph_uint32 *h0_ptr = packed_mphf; + packed_mphf = (cmph_uint32 *)(((cmph_uint8 *)packed_mphf) + hash_state_packed_size(h0_type)); + + register cmph_uint32 k = *packed_mphf++; + + register cmph_float32 c = (cmph_float32)(*packed_mphf); + packed_mphf++; + + register CMPH_HASH h1_type = *packed_mphf++; + + register CMPH_HASH h2_type = *packed_mphf++; + + register cmph_uint8 * size = (cmph_uint8 *) packed_mphf; + packed_mphf = (cmph_uint32 *)(size + k); + + register cmph_uint32 * offset = packed_mphf; + packed_mphf += k; + + register cmph_uint32 h0; + + hash_vector_packed(h0_ptr, h0_type, key, keylen, fingerprint); + h0 = fingerprint[2] % k; + + register cmph_uint32 m = size[h0]; + register cmph_uint32 b = fch_calc_b(c, m); + register cmph_float32 p1 = fch_calc_p1(m); + register cmph_float32 p2 = fch_calc_p2(b); + + #if defined (__ia64) || defined (__x86_64__) + register cmph_uint64 * g_is_ptr = (cmph_uint64 *)packed_mphf; + #else + register cmph_uint32 * g_is_ptr = packed_mphf; + #endif + + register cmph_uint8 * h1_ptr = (cmph_uint8 *) g_is_ptr[h0]; + + register cmph_uint8 * h2_ptr = h1_ptr + hash_state_packed_size(h1_type); + + register cmph_uint8 * g = h2_ptr + hash_state_packed_size(h2_type); + + register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % m; + register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % m; + + register cmph_uint8 mphf_bucket = 0; + h1 = mixh10h11h12(b, p1, p2, h1); + mphf_bucket = (h2 + g[h1]) % m; + return (mphf_bucket + offset[h0]); } /** cmph_uint32 brz_search(void *packed_mphf, const char *key, cmph_uint32 keylen); @@ -747,6 +990,16 @@ cmph_uint32 brz_packed_size(cmph_t *mphf) */ cmph_uint32 brz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen) { - return 0; + register cmph_uint32 *ptr = (cmph_uint32 *)packed_mphf; + register CMPH_ALGO algo = *ptr++; + cmph_uint32 fingerprint[3]; + switch(algo) + { + case CMPH_FCH: + return brz_fch_search_packed(ptr, key, keylen, fingerprint); + case CMPH_BMZ8: + return brz_bmz8_search_packed(ptr, key, keylen, fingerprint); + default: assert(0); + } } diff --git a/src/chm.c b/src/chm.c index 0ed591f..6b140f5 100644 --- a/src/chm.c +++ b/src/chm.c @@ -331,22 +331,33 @@ cmph_uint32 chm_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 ke void chm_pack(cmph_t *mphf, void *packed_mphf) { chm_data_t *data = (chm_data_t *)mphf->data; - cmph_uint32 * ptr = packed_mphf; - + cmph_uint8 * ptr = packed_mphf; + + // packing h1 type + CMPH_HASH h1_type = hash_get_type(data->hashes[0]); + *((cmph_uint32 *) ptr) = h1_type; + ptr += sizeof(cmph_uint32); + // packing h1 hash_state_pack(data->hashes[0], ptr); - - ptr += (hash_state_packed_size(data->hashes[0]) >> 2); // (hash_state_packed_size(data->hashes[0]) / 4); - + ptr += hash_state_packed_size(h1_type); + + // packing h2 type + CMPH_HASH h2_type = hash_get_type(data->hashes[1]); + *((cmph_uint32 *) ptr) = h2_type; + ptr += sizeof(cmph_uint32); + // packing h2 hash_state_pack(data->hashes[1], ptr); - ptr += (hash_state_packed_size(data->hashes[1]) >> 2); // (hash_state_packed_size(data->hashes[1]) / 4); + ptr += hash_state_packed_size(h2_type); // packing n - *ptr++ = data->n; - + *((cmph_uint32 *) ptr) = data->n; + ptr += sizeof(data->n); + // packing m - *ptr++ = data->m; + *((cmph_uint32 *) ptr) = data->m; + ptr += sizeof(data->m); // packing g memcpy(ptr, data->g, sizeof(cmph_uint32)*data->n); @@ -360,7 +371,11 @@ void chm_pack(cmph_t *mphf, void *packed_mphf) cmph_uint32 chm_packed_size(cmph_t *mphf) { chm_data_t *data = (chm_data_t *)mphf->data; - return (sizeof(CMPH_ALGO) + 2*hash_state_packed_size(data->hashes[0]) + 2*sizeof(cmph_uint32) + sizeof(cmph_uint32)*data->n); + CMPH_HASH h1_type = hash_get_type(data->hashes[0]); + CMPH_HASH h2_type = hash_get_type(data->hashes[1]); + + return (sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) + + 4*sizeof(cmph_uint32) + sizeof(cmph_uint32)*data->n); } /** cmph_uint32 chm_search(void *packed_mphf, const char *key, cmph_uint32 keylen); @@ -372,22 +387,21 @@ cmph_uint32 chm_packed_size(cmph_t *mphf) */ cmph_uint32 chm_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen) { - register cmph_uint32 *h1_ptr = (cmph_uint32 *)packed_mphf; - register cmph_uint32 h1_size = *h1_ptr; + register cmph_uint8 *h1_ptr = packed_mphf; + register CMPH_HASH h1_type = *((cmph_uint32 *)h1_ptr); + h1_ptr += 4; -// fprintf(stderr, "h1_size:%u\n", h1_size); - - register cmph_uint32 *h2_ptr = h1_ptr + (h1_size >> 2); // h1_ptr + h1_size/4 - register cmph_uint32 h2_size = *h2_ptr; -// fprintf(stderr, "h2_size:%u\n", h2_size); + register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type); + register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr); + h2_ptr += 4; - register cmph_uint32 *g_ptr = h2_ptr + (h2_size >> 2); // h2_ptr + h2_size/4 + register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type)); register cmph_uint32 n = *g_ptr++; register cmph_uint32 m = *g_ptr++; - register cmph_uint32 h1 = hash_packed(h1_ptr, key, keylen) % n; - register cmph_uint32 h2 = hash_packed(h2_ptr, key, keylen) % n; + register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n; + register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n; DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2); if (h1 == h2 && ++h2 >= n) h2 = 0; DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, g_ptr[h1], g_ptr[h2], m); diff --git a/src/cmph.c b/src/cmph.c index 7084765..4768760 100644 --- a/src/cmph.c +++ b/src/cmph.c @@ -304,34 +304,37 @@ void cmph_config_set_memory_availability(cmph_config_t *mph, cmph_uint32 memory_ void cmph_config_destroy(cmph_config_t *mph) { - DEBUGP("Destroying mph with algo %s\n", cmph_names[mph->algo]); - switch (mph->algo) + if(mph) { - case CMPH_CHM: - chm_config_destroy(mph); - break; - case CMPH_BMZ: /* included -- Fabiano */ - bmz_config_destroy(mph); - break; - case CMPH_BMZ8: /* included -- Fabiano */ - bmz8_config_destroy(mph); - break; - case CMPH_BRZ: /* included -- Fabiano */ - brz_config_destroy(mph); - break; - case CMPH_FCH: /* included -- Fabiano */ - fch_config_destroy(mph); - break; - case CMPH_BDZ: /* included -- Fabiano */ - bdz_config_destroy(mph); - break; - case CMPH_BDZ_PH: /* included -- Fabiano */ - bdz_ph_config_destroy(mph); - break; - default: - assert(0); + DEBUGP("Destroying mph with algo %s\n", cmph_names[mph->algo]); + switch (mph->algo) + { + case CMPH_CHM: + chm_config_destroy(mph); + break; + case CMPH_BMZ: /* included -- Fabiano */ + bmz_config_destroy(mph); + break; + case CMPH_BMZ8: /* included -- Fabiano */ + bmz8_config_destroy(mph); + break; + case CMPH_BRZ: /* included -- Fabiano */ + brz_config_destroy(mph); + break; + case CMPH_FCH: /* included -- Fabiano */ + fch_config_destroy(mph); + break; + case CMPH_BDZ: /* included -- Fabiano */ + bdz_config_destroy(mph); + break; + case CMPH_BDZ_PH: /* included -- Fabiano */ + bdz_ph_config_destroy(mph); + break; + default: + assert(0); + } + __config_destroy(mph); } - __config_destroy(mph); } void cmph_config_set_verbosity(cmph_config_t *mph, cmph_uint32 verbosity) diff --git a/src/cmph_types.h b/src/cmph_types.h index 73a9d03..2fdcb14 100644 --- a/src/cmph_types.h +++ b/src/cmph_types.h @@ -6,6 +6,28 @@ typedef unsigned short cmph_uint16; typedef unsigned int cmph_uint32; typedef float cmph_float32; +#if defined (__ia64) || defined (__x86_64__) + /** \typedef long cmph_int64; + * \brief 64-bit integer for a 64-bit achitecture. + */ + typedef long cmph_int64; + + /** \typedef unsigned long cmph_uint64; + * \brief Unsigned 64-bit integer for a 64-bit achitecture. + */ + typedef unsigned long cmph_uint64; +#else + /** \typedef long long cmph_int64; + * \brief 64-bit integer for a 32-bit achitecture. + */ + typedef long long cmph_int64; + + /** \typedef unsigned long long cmph_uint64; + * \brief Unsigned 64-bit integer for a 32-bit achitecture. + */ + typedef unsigned long long cmph_uint64; +#endif + typedef enum { CMPH_HASH_JENKINS, CMPH_HASH_COUNT } CMPH_HASH; extern const char *cmph_hash_names[]; typedef enum { CMPH_BMZ, CMPH_BMZ8, CMPH_CHM, CMPH_BRZ, CMPH_FCH, diff --git a/src/fch.c b/src/fch.c index b3a19ce..6e8aaef 100644 --- a/src/fch.c +++ b/src/fch.c @@ -425,7 +425,17 @@ void fch_destroy(cmph_t *mphf) */ cmph_uint32 fch_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint) { - return 0; + register fch_data_t *fch = mphf->data; + + hash_vector(fch->h1, key, keylen, fingerprint); + register cmph_uint32 h1 = fingerprint[2] % fch->m; + + hash_vector(fch->h2, key, keylen, fingerprint); + register cmph_uint32 h2 = fingerprint[2] % fch->m; + + h1 = mixh10h11h12 (fch->b, fch->p1, fch->p2, h1); + //DEBUGP("key: %s h1: %u h2: %u g[h1]: %u\n", key, h1, h2, fch->g[h1]); + return (h2 + fch->g[h1]) % fch->m; } /** \fn void fch_pack(cmph_t *mphf, void *packed_mphf); @@ -435,6 +445,45 @@ cmph_uint32 fch_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 ke */ void fch_pack(cmph_t *mphf, void *packed_mphf) { + fch_data_t *data = (fch_data_t *)mphf->data; + cmph_uint8 * ptr = packed_mphf; + + // packing h1 type + CMPH_HASH h1_type = hash_get_type(data->h1); + *((cmph_uint32 *) ptr) = h1_type; + ptr += sizeof(cmph_uint32); + + // packing h1 + hash_state_pack(data->h1, ptr); + ptr += hash_state_packed_size(h1_type); + + // packing h2 type + CMPH_HASH h2_type = hash_get_type(data->h2); + *((cmph_uint32 *) ptr) = h2_type; + ptr += sizeof(cmph_uint32); + + // packing h2 + hash_state_pack(data->h2, ptr); + ptr += hash_state_packed_size(h2_type); + + // packing m + *((cmph_uint32 *) ptr) = data->m; + ptr += sizeof(data->m); + + // packing b + *((cmph_uint32 *) ptr) = data->b; + ptr += sizeof(data->b); + + // packing p1 + *((cmph_uint32 *)ptr) = (cmph_uint32)data->p1; + ptr += sizeof(data->p1); + + // packing p2 + *((cmph_uint32 *)ptr) = (cmph_uint32)data->p2; + ptr += sizeof(data->p2); + + // packing g + memcpy(ptr, data->g, sizeof(cmph_uint32)*(data->b)); } /** \fn cmph_uint32 fch_packed_size(cmph_t *mphf); @@ -444,7 +493,12 @@ void fch_pack(cmph_t *mphf, void *packed_mphf) */ cmph_uint32 fch_packed_size(cmph_t *mphf) { - return 0; + fch_data_t *data = (fch_data_t *)mphf->data; + CMPH_HASH h1_type = hash_get_type(data->h1); + CMPH_HASH h2_type = hash_get_type(data->h2); + + return (sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) + + 4*sizeof(cmph_uint32) + 2*sizeof(cmph_float32) + sizeof(cmph_uint32)*(data->b)); } @@ -457,6 +511,31 @@ cmph_uint32 fch_packed_size(cmph_t *mphf) */ cmph_uint32 fch_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen) { - return 0; + register cmph_uint8 *h1_ptr = packed_mphf; + register CMPH_HASH h1_type = *((cmph_uint32 *)h1_ptr); + h1_ptr += 4; + + register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type); + register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr); + h2_ptr += 4; + + register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type)); + + register cmph_uint32 m = *g_ptr++; + + register cmph_uint32 b = *g_ptr++; + + register cmph_float32 p1 = (cmph_float32)(*g_ptr); + g_ptr++; + + register cmph_float32 p2 = (cmph_float32)(*g_ptr); + g_ptr++; + + register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % m; + register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % m; + + h1 = mixh10h11h12 (b, p1, p2, h1); + return (h2 + g_ptr[h1]) % m; + } diff --git a/src/hash.c b/src/hash.c index 60f630f..53bbba2 100644 --- a/src/hash.c +++ b/src/hash.c @@ -124,28 +124,21 @@ void hash_state_destroy(hash_state_t *state) return; } -/** \fn void hash_state_pack(hash_state_t *state, void *hash_packed); +/** \fn void hash_state_pack(hash_state_t *state, void *hash_packed) * \brief Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed. * \param state points to the hash function - * \param hash_packed pointer to the contiguous memory area used to store the hash function. The size of hash_packed must be at least hash_state_packed_size() + * \param hash_packed pointer to the contiguous memory area used to store the hash function. The size of hash_packed must be at least hash_state_packed_size() + * + * Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed. + * However, the hash function type must be packed outside. */ void hash_state_pack(hash_state_t *state, void *hash_packed) { - cmph_uint32 * ptr = (cmph_uint32 *)hash_packed; - cmph_uint32 * ptr_size = ptr++; - - // Reserve space for the hash function size - *ptr_size = 0; - - // Pack the hash function type - *ptr++ = state->hashfunc; - switch (state->hashfunc) { case CMPH_HASH_JENKINS: // pack the jenkins hash function - jenkins_state_pack((jenkins_state_t *)state, ptr); - *ptr_size = sizeof(cmph_uint32) + sizeof(CMPH_HASH) + jenkins_state_packed_size(); + jenkins_state_pack((jenkins_state_t *)state, hash_packed); break; default: assert(0); @@ -153,15 +146,15 @@ void hash_state_pack(hash_state_t *state, void *hash_packed) return; } -/** \fn cmph_uint32 hash_state_packed_size(hash_state_t *state); +/** \fn cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc) * \brief Return the amount of space needed to pack a hash function. - * \param state points to a hash function + * \param hashfunc function type * \return the size of the packed function or zero for failures */ -cmph_uint32 hash_state_packed_size(hash_state_t *state) +cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc) { - cmph_uint32 size = sizeof(cmph_uint32) + sizeof(CMPH_HASH); - switch (state->hashfunc) + cmph_uint32 size = 0; + switch (hashfunc) { case CMPH_HASH_JENKINS: size += jenkins_state_packed_size(); @@ -172,23 +165,19 @@ cmph_uint32 hash_state_packed_size(hash_state_t *state) return size; } - -/** \fn cmph_uint32 hash_packed(void *hash_packed, const char *k, cmph_uint32 keylen) +/** \fn cmph_uint32 hash_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen) * \param hash_packed is a pointer to a contiguous memory area + * \param hashfunc is the type of the hash function packed in hash_packed * \param key is a pointer to a key * \param keylen is the key length * \return an integer that represents a hash value of 32 bits. */ -cmph_uint32 hash_packed(void *hash_packed, const char *k, cmph_uint32 keylen) +cmph_uint32 hash_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen) { - register cmph_uint32 * ptr = (((cmph_uint32 *) hash_packed) + 1); - - register CMPH_HASH hashfunc = *ptr++; - switch (hashfunc) { case CMPH_HASH_JENKINS: - return jenkins_hash_packed(ptr, k, keylen); + return jenkins_hash_packed(hash_packed, k, keylen); default: assert(0); } @@ -196,23 +185,30 @@ cmph_uint32 hash_packed(void *hash_packed, const char *k, cmph_uint32 keylen) return 0; } -/** \fn hash_vector_packed(void *hash_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes); +/** \fn hash_vector_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes) * \param hash_packed is a pointer to a contiguous memory area * \param key is a pointer to a key * \param keylen is the key length * \param hashes is a pointer to a memory large enough to fit three 32-bit integers. */ -void hash_vector_packed(void *hash_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes) -{ - cmph_uint32 * ptr = (((cmph_uint32 *) hash_packed) + 1); - - CMPH_HASH hashfunc = *ptr++; +void hash_vector_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes) +{ switch (hashfunc) { case CMPH_HASH_JENKINS: - jenkins_hash_vector_packed(ptr, k, keylen, hashes); + jenkins_hash_vector_packed(hash_packed, k, keylen, hashes); break; default: assert(0); } } + + +/** \fn CMPH_HASH hash_get_type(hash_state_t *state); + * \param state is a pointer to a hash_state_t structure + * \return the hash function type pointed by state + */ +CMPH_HASH hash_get_type(hash_state_t *state) +{ + return state->hashfunc; +} diff --git a/src/hash.h b/src/hash.h index 62711a5..0ec4ce1 100644 --- a/src/hash.h +++ b/src/hash.h @@ -34,32 +34,43 @@ void hash_state_destroy(hash_state_t *state); /** \fn void hash_state_pack(hash_state_t *state, void *hash_packed); * \brief Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed. * \param state points to the hash function - * \param hash_packed pointer to the contiguous memory area used to store the hash function. The size of hash_packed must be at least hash_state_packed_size() + * \param hash_packed pointer to the contiguous memory area used to store the hash function. The size of hash_packed must be at least hash_state_packed_size() + * + * Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed. + * However, the hash function type must be packed outside. */ void hash_state_pack(hash_state_t *state, void *hash_packed); -/** \fn cmph_uint32 hash_state_packed_size(hash_state_t *state); - * \brief Return the amount of space needed to pack a hash function. - * \param state points to a hash function - * \return the size of the packed function or zero for failures - */ -cmph_uint32 hash_state_packed_size(hash_state_t *state); - - -/** \fn cmph_uint32 hash_packed(void *hash_packed, const char *k, cmph_uint32 keylen); +/** \fn cmph_uint32 hash_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen); * \param hash_packed is a pointer to a contiguous memory area + * \param hashfunc is the type of the hash function packed in hash_packed * \param key is a pointer to a key * \param keylen is the key length * \return an integer that represents a hash value of 32 bits. */ -cmph_uint32 hash_packed(void *hash_packed, const char *k, cmph_uint32 keylen); +cmph_uint32 hash_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen); -/** \fn hash_vector_packed(void *hash_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes); +/** \fn cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc) + * \brief Return the amount of space needed to pack a hash function. + * \param hashfunc function type + * \return the size of the packed function or zero for failures + */ +cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc); + + +/** \fn hash_vector_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes); * \param hash_packed is a pointer to a contiguous memory area * \param key is a pointer to a key * \param keylen is the key length * \param hashes is a pointer to a memory large enough to fit three 32-bit integers. */ -void hash_vector_packed(void *hash_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes); +void hash_vector_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes); + + +/** \fn CMPH_HASH hash_get_type(hash_state_t *state); + * \param state is a pointer to a hash_state_t structure + * \return the hash function type pointed by state + */ +CMPH_HASH hash_get_type(hash_state_t *state); #endif diff --git a/src/main.c b/src/main.c index fbf3806..fa42370 100644 --- a/src/main.c +++ b/src/main.c @@ -241,6 +241,7 @@ int main(int argc, char **argv) if(mph_algo == CMPH_BMZ && c >= 2.0) c=1.15; if (c != 0) cmph_config_set_graphsize(config, c); mphf = cmph_new(config); + cmph_config_destroy(config); if (mphf == NULL) {