*** empty log message ***
This commit is contained in:
parent
d63806a90a
commit
ca2f228840
51
src/bdz.c
51
src/bdz.c
|
@ -634,31 +634,34 @@ cmph_uint32 bdz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 ke
|
|||
void bdz_pack(cmph_t *mphf, void *packed_mphf)
|
||||
{
|
||||
bdz_data_t *data = (bdz_data_t *)mphf->data;
|
||||
cmph_uint32 * ptr = packed_mphf;
|
||||
cmph_uint8 * ptr = packed_mphf;
|
||||
|
||||
// packing hl type
|
||||
CMPH_HASH hl_type = hash_get_type(data->hl);
|
||||
*((cmph_uint32 *) ptr) = hl_type;
|
||||
ptr += sizeof(cmph_uint32);
|
||||
|
||||
// packing hl
|
||||
hash_state_pack(data->hl, ptr);
|
||||
ptr += hash_state_packed_size(hl_type);
|
||||
|
||||
|
||||
ptr += (hash_state_packed_size(data->hl) >> 2); // (hash_state_packed_size(data->hl) / 4);
|
||||
|
||||
// packing r
|
||||
*ptr++ = data->r;
|
||||
*((cmph_uint32 *) ptr) = data->r;
|
||||
ptr += sizeof(data->r);
|
||||
|
||||
// packing ranktablesize
|
||||
*ptr++ = data->ranktablesize;
|
||||
*((cmph_uint32 *) ptr) = data->ranktablesize;
|
||||
ptr += sizeof(data->ranktablesize);
|
||||
|
||||
// packing ranktable
|
||||
memcpy(ptr, data->ranktable, sizeof(cmph_uint32)*(data->ranktablesize));
|
||||
ptr += data->ranktablesize;
|
||||
ptr += sizeof(cmph_uint32)*(data->ranktablesize);
|
||||
|
||||
cmph_uint8 * ptr8 = (cmph_uint8 *) ptr;
|
||||
|
||||
// packing b
|
||||
*ptr8++ = data->b;
|
||||
*ptr++ = data->b;
|
||||
|
||||
// packing g
|
||||
memcpy(ptr8, data->g, sizeof(cmph_uint8)*((data->n >> 2) +1));
|
||||
memcpy(ptr, data->g, sizeof(cmph_uint8)*((data->n >> 2) +1));
|
||||
}
|
||||
|
||||
/** \fn cmph_uint32 bdz_packed_size(cmph_t *mphf);
|
||||
|
@ -669,7 +672,10 @@ void bdz_pack(cmph_t *mphf, void *packed_mphf)
|
|||
cmph_uint32 bdz_packed_size(cmph_t *mphf)
|
||||
{
|
||||
bdz_data_t *data = (bdz_data_t *)mphf->data;
|
||||
return (sizeof(CMPH_ALGO) + hash_state_packed_size(data->hl) + (sizeof(cmph_uint32) << 1) + sizeof(cmph_uint32)*(data->ranktablesize) + sizeof(cmph_uint8) + sizeof(cmph_uint8)*((data->n >> 2) +1));
|
||||
|
||||
CMPH_HASH hl_type = hash_get_type(data->hl);
|
||||
|
||||
return (sizeof(CMPH_ALGO) + hash_state_packed_size(hl_type) + 3*sizeof(cmph_uint32) + sizeof(cmph_uint32)*(data->ranktablesize) + sizeof(cmph_uint8) + sizeof(cmph_uint8)*((data->n >> 2) +1));
|
||||
}
|
||||
|
||||
/** cmph_uint32 bdz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||
|
@ -681,21 +687,20 @@ cmph_uint32 bdz_packed_size(cmph_t *mphf)
|
|||
*/
|
||||
cmph_uint32 bdz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
|
||||
{
|
||||
register cmph_uint32 vertex;
|
||||
register cmph_uint32 *hl_ptr = (cmph_uint32 *)packed_mphf;
|
||||
register cmph_uint32 hl_size = *hl_ptr;
|
||||
register cmph_uint32 *ptr = hl_ptr + (hl_size >> 2); // h2_ptr + h2_size/4
|
||||
|
||||
register cmph_uint32 r = *ptr++;
|
||||
register cmph_uint32 ranktablesize = *ptr++;
|
||||
register cmph_uint32 *ranktable = ptr;
|
||||
ptr += ranktablesize;
|
||||
|
||||
register cmph_uint8 * g = (cmph_uint8 *)ptr;
|
||||
register cmph_uint32 vertex;
|
||||
register CMPH_HASH hl_type = *(cmph_uint32 *)packed_mphf;
|
||||
register cmph_uint8 *hl_ptr = (cmph_uint8 *)(packed_mphf + 4);
|
||||
|
||||
register cmph_uint32 *ranktable = (cmph_uint32*)(hl_ptr + hash_state_packed_size(hl_type));
|
||||
|
||||
register cmph_uint32 r = *ranktable++;
|
||||
register cmph_uint32 ranktablesize = *ranktable++;
|
||||
register cmph_uint8 * g = (cmph_uint8 *)(ranktable + ranktablesize);
|
||||
register cmph_uint8 b = *g++;
|
||||
|
||||
cmph_uint32 hl[3];
|
||||
hash_vector_packed(hl_ptr, key, keylen, hl);
|
||||
hash_vector_packed(hl_ptr, hl_type, key, keylen, hl);
|
||||
hl[0] = hl[0] % r;
|
||||
hl[1] = hl[1] % r + r;
|
||||
hl[2] = hl[2] % r + (r << 1);
|
||||
|
|
31
src/bdz_ph.c
31
src/bdz_ph.c
|
@ -569,16 +569,20 @@ cmph_uint32 bdz_ph_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32
|
|||
void bdz_ph_pack(cmph_t *mphf, void *packed_mphf)
|
||||
{
|
||||
bdz_ph_data_t *data = (bdz_ph_data_t *)mphf->data;
|
||||
cmph_uint32 * ptr = packed_mphf;
|
||||
cmph_uint8 * ptr = packed_mphf;
|
||||
|
||||
// packing hl type
|
||||
CMPH_HASH hl_type = hash_get_type(data->hl);
|
||||
*((cmph_uint32 *) ptr) = hl_type;
|
||||
ptr += sizeof(cmph_uint32);
|
||||
|
||||
// packing hl
|
||||
hash_state_pack(data->hl, ptr);
|
||||
|
||||
|
||||
ptr += (hash_state_packed_size(data->hl) >> 2); // (hash_state_packed_size(data->hl) / 4);
|
||||
ptr += hash_state_packed_size(hl_type);
|
||||
|
||||
// packing r
|
||||
*ptr++ = data->r;
|
||||
*((cmph_uint32 *) ptr) = data->r;
|
||||
ptr += sizeof(data->r);
|
||||
|
||||
// packing g
|
||||
memcpy(ptr, data->g, sizeof(cmph_uint8)*((data->n/5)+1));
|
||||
|
@ -592,7 +596,9 @@ void bdz_ph_pack(cmph_t *mphf, void *packed_mphf)
|
|||
cmph_uint32 bdz_ph_packed_size(cmph_t *mphf)
|
||||
{
|
||||
bdz_ph_data_t *data = (bdz_ph_data_t *)mphf->data;
|
||||
return (sizeof(CMPH_ALGO) + hash_state_packed_size(data->hl) + sizeof(cmph_uint32) + sizeof(cmph_uint8)*((data->n/5)+1));
|
||||
CMPH_HASH hl_type = hash_get_type(data->hl);
|
||||
|
||||
return (sizeof(CMPH_ALGO) + hash_state_packed_size(hl_type) + 2*sizeof(cmph_uint32) + sizeof(cmph_uint8)*((data->n/5)+1));
|
||||
}
|
||||
|
||||
/** cmph_uint32 bdz_ph_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||
|
@ -605,18 +611,19 @@ cmph_uint32 bdz_ph_packed_size(cmph_t *mphf)
|
|||
cmph_uint32 bdz_ph_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
|
||||
{
|
||||
|
||||
register cmph_uint32 *hl_ptr = (cmph_uint32 *)packed_mphf;
|
||||
register cmph_uint32 hl_size = *hl_ptr;
|
||||
register cmph_uint32 *ptr = hl_ptr + (hl_size >> 2); // h2_ptr + h2_size/4
|
||||
register CMPH_HASH hl_type = *(cmph_uint32 *)packed_mphf;
|
||||
register cmph_uint8 *hl_ptr = (cmph_uint8 *)(packed_mphf + 4);
|
||||
|
||||
register cmph_uint8 * ptr = hl_ptr + hash_state_packed_size(hl_type);
|
||||
|
||||
register cmph_uint32 r = *ptr++;
|
||||
register cmph_uint8 * g = (cmph_uint8 *)ptr;
|
||||
register cmph_uint32 r = *((cmph_uint32*) ptr);
|
||||
register cmph_uint8 * g = ptr + 4;
|
||||
|
||||
cmph_uint32 hl[3];
|
||||
register cmph_uint8 byte0, byte1, byte2;
|
||||
register cmph_uint32 vertex;
|
||||
|
||||
hash_vector_packed(hl_ptr, key, keylen, hl);
|
||||
hash_vector_packed(hl_ptr, hl_type, key, keylen, hl);
|
||||
|
||||
hl[0] = hl[0] % r;
|
||||
hl[1] = hl[1] % r + r;
|
||||
|
|
57
src/bmz.c
57
src/bmz.c
|
@ -575,23 +575,34 @@ cmph_uint32 bmz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 ke
|
|||
*/
|
||||
void bmz_pack(cmph_t *mphf, void *packed_mphf)
|
||||
{
|
||||
|
||||
bmz_data_t *data = (bmz_data_t *)mphf->data;
|
||||
cmph_uint32 * ptr = packed_mphf;
|
||||
|
||||
cmph_uint8 * ptr = packed_mphf;
|
||||
|
||||
// packing h1 type
|
||||
CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
|
||||
*((cmph_uint32 *) ptr) = h1_type;
|
||||
ptr += sizeof(cmph_uint32);
|
||||
|
||||
// packing h1
|
||||
hash_state_pack(data->hashes[0], ptr);
|
||||
|
||||
ptr += (hash_state_packed_size(data->hashes[0]) >> 2); // (hash_state_packed_size(data->hashes[0]) / 4);
|
||||
|
||||
ptr += hash_state_packed_size(h1_type);
|
||||
|
||||
// packing h2 type
|
||||
CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
|
||||
*((cmph_uint32 *) ptr) = h2_type;
|
||||
ptr += sizeof(cmph_uint32);
|
||||
|
||||
// packing h2
|
||||
hash_state_pack(data->hashes[1], ptr);
|
||||
ptr += (hash_state_packed_size(data->hashes[1]) >> 2); // (hash_state_packed_size(data->hashes[1]) / 4);
|
||||
ptr += hash_state_packed_size(h2_type);
|
||||
|
||||
// packing n
|
||||
*ptr++ = data->n;
|
||||
|
||||
*((cmph_uint32 *) ptr) = data->n;
|
||||
ptr += sizeof(data->n);
|
||||
|
||||
// packing g
|
||||
memcpy(ptr, data->g, sizeof(cmph_uint32)*data->n);
|
||||
memcpy(ptr, data->g, sizeof(cmph_uint32)*data->n);
|
||||
}
|
||||
|
||||
/** \fn cmph_uint32 bmz_packed_size(cmph_t *mphf);
|
||||
|
@ -602,7 +613,11 @@ void bmz_pack(cmph_t *mphf, void *packed_mphf)
|
|||
cmph_uint32 bmz_packed_size(cmph_t *mphf)
|
||||
{
|
||||
bmz_data_t *data = (bmz_data_t *)mphf->data;
|
||||
return (sizeof(CMPH_ALGO) + 2*hash_state_packed_size(data->hashes[0]) + sizeof(cmph_uint32) + sizeof(cmph_uint32)*data->n);
|
||||
CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
|
||||
CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
|
||||
|
||||
return (sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
|
||||
3*sizeof(cmph_uint32) + sizeof(cmph_uint32)*data->n);
|
||||
}
|
||||
|
||||
/** cmph_uint32 bmz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||
|
@ -614,20 +629,22 @@ cmph_uint32 bmz_packed_size(cmph_t *mphf)
|
|||
*/
|
||||
cmph_uint32 bmz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
|
||||
{
|
||||
register cmph_uint32 *h1_ptr = (cmph_uint32 *)packed_mphf;
|
||||
register cmph_uint32 h1_size = *h1_ptr;
|
||||
register cmph_uint8 *h1_ptr = packed_mphf;
|
||||
register CMPH_HASH h1_type = *((cmph_uint32 *)h1_ptr);
|
||||
h1_ptr += 4;
|
||||
|
||||
register cmph_uint32 *h2_ptr = h1_ptr + (h1_size >> 2); // h1_ptr + h1_size/4
|
||||
register cmph_uint32 h2_size = *h2_ptr;
|
||||
register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
|
||||
register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr);
|
||||
h2_ptr += 4;
|
||||
|
||||
register cmph_uint32 *g_ptr = h2_ptr + (h2_size >> 2); // h2_ptr + h2_size/4
|
||||
register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type));
|
||||
|
||||
register cmph_uint32 n = *g_ptr++;
|
||||
|
||||
register cmph_uint32 h1 = hash_packed(h1_ptr, key, keylen) % n;
|
||||
register cmph_uint32 h2 = hash_packed(h2_ptr, key, keylen) % n;
|
||||
|
||||
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n;
|
||||
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n;
|
||||
DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
|
||||
if (h1 == h2 && ++h2 > n) h2 = 0;
|
||||
|
||||
return (g_ptr[h1] + g_ptr[h2]);
|
||||
DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, g_ptr[h1], g_ptr[h2], m);
|
||||
return (g_ptr[h1] + g_ptr[h2]);
|
||||
}
|
||||
|
|
52
src/bmz8.c
52
src/bmz8.c
|
@ -586,22 +586,30 @@ void bmz8_pack(cmph_t *mphf, void *packed_mphf)
|
|||
{
|
||||
bmz8_data_t *data = (bmz8_data_t *)mphf->data;
|
||||
cmph_uint8 * ptr = packed_mphf;
|
||||
|
||||
|
||||
// packing h1 type
|
||||
CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
|
||||
*((cmph_uint32 *) ptr) = h1_type;
|
||||
ptr += sizeof(cmph_uint32);
|
||||
|
||||
// packing h1
|
||||
hash_state_pack(data->hashes[0], ptr);
|
||||
|
||||
ptr += hash_state_packed_size(data->hashes[0]);
|
||||
|
||||
ptr += hash_state_packed_size(h1_type);
|
||||
|
||||
// packing h2 type
|
||||
CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
|
||||
*((cmph_uint32 *) ptr) = h2_type;
|
||||
ptr += sizeof(cmph_uint32);
|
||||
|
||||
// packing h2
|
||||
hash_state_pack(data->hashes[1], ptr);
|
||||
ptr += hash_state_packed_size(data->hashes[1]);
|
||||
ptr += hash_state_packed_size(h2_type);
|
||||
|
||||
// packing n
|
||||
*ptr++ = data->n;
|
||||
|
||||
|
||||
// packing g
|
||||
memcpy(ptr, data->g, sizeof(cmph_uint8)*data->n);
|
||||
|
||||
memcpy(ptr, data->g, sizeof(cmph_uint8)*data->n);
|
||||
}
|
||||
|
||||
/** \fn cmph_uint32 bmz8_packed_size(cmph_t *mphf);
|
||||
|
@ -612,7 +620,11 @@ void bmz8_pack(cmph_t *mphf, void *packed_mphf)
|
|||
cmph_uint32 bmz8_packed_size(cmph_t *mphf)
|
||||
{
|
||||
bmz8_data_t *data = (bmz8_data_t *)mphf->data;
|
||||
return (sizeof(CMPH_ALGO) + 2*hash_state_packed_size(data->hashes[0]) + sizeof(cmph_uint8) + sizeof(cmph_uint8)*data->n);
|
||||
CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
|
||||
CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
|
||||
|
||||
return (sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
|
||||
2*sizeof(cmph_uint32) + sizeof(cmph_uint8) + sizeof(cmph_uint8)*data->n);
|
||||
}
|
||||
|
||||
/** cmph_uint8 bmz8_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||
|
@ -624,20 +636,22 @@ cmph_uint32 bmz8_packed_size(cmph_t *mphf)
|
|||
*/
|
||||
cmph_uint8 bmz8_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
|
||||
{
|
||||
register cmph_uint32 *h1_ptr = (cmph_uint32 *)packed_mphf;
|
||||
register cmph_uint32 h1_size = *h1_ptr;
|
||||
register cmph_uint8 *h1_ptr = packed_mphf;
|
||||
register CMPH_HASH h1_type = *((cmph_uint32 *)h1_ptr);
|
||||
h1_ptr += 4;
|
||||
|
||||
register cmph_uint32 *h2_ptr = h1_ptr + (h1_size >> 2); // h1_ptr + h1_size/4
|
||||
register cmph_uint32 h2_size = *h2_ptr;
|
||||
register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
|
||||
register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr);
|
||||
h2_ptr += 4;
|
||||
|
||||
register cmph_uint8 *g_ptr = (cmph_uint8 *)(h2_ptr + (h2_size >> 2)); // h2_ptr + h2_size/4
|
||||
register cmph_uint8 *g_ptr = h2_ptr + hash_state_packed_size(h2_type);
|
||||
|
||||
register cmph_uint8 n = *g_ptr++;
|
||||
|
||||
register cmph_uint8 h1 = hash_packed(h1_ptr, key, keylen) % n;
|
||||
register cmph_uint8 h2 = hash_packed(h2_ptr, key, keylen) % n;
|
||||
|
||||
register cmph_uint8 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n;
|
||||
register cmph_uint8 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n;
|
||||
DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
|
||||
if (h1 == h2 && ++h2 > n) h2 = 0;
|
||||
|
||||
return (g_ptr[h1] + g_ptr[h2]);
|
||||
DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, g_ptr[h1], g_ptr[h2], m);
|
||||
return (g_ptr[h1] + g_ptr[h2]);
|
||||
}
|
||||
|
|
293
src/brz.c
293
src/brz.c
|
@ -635,14 +635,19 @@ void brz_load(FILE *f, cmph_t *mphf)
|
|||
return;
|
||||
}
|
||||
|
||||
static cmph_uint32 brz_bmz8_search(brz_data_t *brz, const char *key, cmph_uint32 keylen)
|
||||
static cmph_uint32 brz_bmz8_search(brz_data_t *brz, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint)
|
||||
{
|
||||
cmph_uint32 h0 = hash(brz->h0, key, keylen) % brz->k;
|
||||
cmph_uint32 m = brz->size[h0];
|
||||
cmph_uint32 n = ceil(brz->c * m);
|
||||
cmph_uint32 h1 = hash(brz->h1[h0], key, keylen) % n;
|
||||
cmph_uint32 h2 = hash(brz->h2[h0], key, keylen) % n;
|
||||
cmph_uint8 mphf_bucket;
|
||||
register cmph_uint32 h0;
|
||||
|
||||
hash_vector(brz->h0, key, keylen, fingerprint);
|
||||
h0 = fingerprint[2] % brz->k;
|
||||
|
||||
register cmph_uint32 m = brz->size[h0];
|
||||
register cmph_uint32 n = ceil(brz->c * m);
|
||||
register cmph_uint32 h1 = hash(brz->h1[h0], key, keylen) % n;
|
||||
register cmph_uint32 h2 = hash(brz->h2[h0], key, keylen) % n;
|
||||
register cmph_uint8 mphf_bucket;
|
||||
|
||||
if (h1 == h2 && ++h2 >= n) h2 = 0;
|
||||
mphf_bucket = brz->g[h0][h1] + brz->g[h0][h2];
|
||||
DEBUGP("key: %s h1: %u h2: %u h0: %u\n", key, h1, h2, h0);
|
||||
|
@ -651,16 +656,20 @@ static cmph_uint32 brz_bmz8_search(brz_data_t *brz, const char *key, cmph_uint32
|
|||
return (mphf_bucket + brz->offset[h0]);
|
||||
}
|
||||
|
||||
static cmph_uint32 brz_fch_search(brz_data_t *brz, const char *key, cmph_uint32 keylen)
|
||||
static cmph_uint32 brz_fch_search(brz_data_t *brz, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint)
|
||||
{
|
||||
cmph_uint32 h0 = hash(brz->h0, key, keylen) % brz->k;
|
||||
cmph_uint32 m = brz->size[h0];
|
||||
cmph_uint32 b = fch_calc_b(brz->c, m);
|
||||
cmph_float32 p1 = fch_calc_p1(m);
|
||||
cmph_float32 p2 = fch_calc_p2(b);
|
||||
cmph_uint32 h1 = hash(brz->h1[h0], key, keylen) % m;
|
||||
cmph_uint32 h2 = hash(brz->h2[h0], key, keylen) % m;
|
||||
cmph_uint8 mphf_bucket = 0;
|
||||
register cmph_uint32 h0;
|
||||
|
||||
hash_vector(brz->h0, key, keylen, fingerprint);
|
||||
h0 = fingerprint[2] % brz->k;
|
||||
|
||||
register cmph_uint32 m = brz->size[h0];
|
||||
register cmph_uint32 b = fch_calc_b(brz->c, m);
|
||||
register cmph_float32 p1 = fch_calc_p1(m);
|
||||
register cmph_float32 p2 = fch_calc_p2(b);
|
||||
register cmph_uint32 h1 = hash(brz->h1[h0], key, keylen) % m;
|
||||
register cmph_uint32 h2 = hash(brz->h2[h0], key, keylen) % m;
|
||||
register cmph_uint8 mphf_bucket = 0;
|
||||
h1 = mixh10h11h12(b, p1, p2, h1);
|
||||
mphf_bucket = (h2 + brz->g[h0][h1]) % m;
|
||||
return (mphf_bucket + brz->offset[h0]);
|
||||
|
@ -669,12 +678,13 @@ static cmph_uint32 brz_fch_search(brz_data_t *brz, const char *key, cmph_uint32
|
|||
cmph_uint32 brz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||
{
|
||||
brz_data_t *brz = mphf->data;
|
||||
cmph_uint32 fingerprint[3];
|
||||
switch(brz->algo)
|
||||
{
|
||||
case CMPH_FCH:
|
||||
return brz_fch_search(brz, key, keylen);
|
||||
return brz_fch_search(brz, key, keylen, fingerprint);
|
||||
case CMPH_BMZ8:
|
||||
return brz_bmz8_search(brz, key, keylen);
|
||||
return brz_bmz8_search(brz, key, keylen, fingerprint);
|
||||
default: assert(0);
|
||||
}
|
||||
return 0;
|
||||
|
@ -716,6 +726,15 @@ void brz_destroy(cmph_t *mphf)
|
|||
*/
|
||||
cmph_uint32 brz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint)
|
||||
{
|
||||
brz_data_t *brz = mphf->data;
|
||||
switch(brz->algo)
|
||||
{
|
||||
case CMPH_FCH:
|
||||
return brz_fch_search(brz, key, keylen, fingerprint);
|
||||
case CMPH_BMZ8:
|
||||
return brz_bmz8_search(brz, key, keylen, fingerprint);
|
||||
default: assert(0);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -726,6 +745,88 @@ cmph_uint32 brz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 ke
|
|||
*/
|
||||
void brz_pack(cmph_t *mphf, void *packed_mphf)
|
||||
{
|
||||
brz_data_t *data = (brz_data_t *)mphf->data;
|
||||
cmph_uint8 * ptr = packed_mphf;
|
||||
cmph_uint32 i,n;
|
||||
|
||||
// packing internal algo type
|
||||
memcpy(ptr, &(data->algo), sizeof(data->algo));
|
||||
ptr += sizeof(data->algo);
|
||||
|
||||
// packing h0 type
|
||||
CMPH_HASH h0_type = hash_get_type(data->h0);
|
||||
memcpy(ptr, &h0_type, sizeof(h0_type));
|
||||
ptr += sizeof(h0_type);
|
||||
|
||||
// packing h0
|
||||
hash_state_pack(data->h0, ptr);
|
||||
ptr += hash_state_packed_size(h0_type);
|
||||
|
||||
// packing k
|
||||
memcpy(ptr, &(data->k), sizeof(data->k));
|
||||
ptr += sizeof(data->k);
|
||||
|
||||
// packing c
|
||||
*((cmph_uint32 *)ptr) = (cmph_uint32)data->c;
|
||||
ptr += sizeof(data->c);
|
||||
|
||||
// packing h1 type
|
||||
CMPH_HASH h1_type = hash_get_type(data->h1[0]);
|
||||
memcpy(ptr, &h1_type, sizeof(h1_type));
|
||||
ptr += sizeof(h1_type);
|
||||
|
||||
// packing h2 type
|
||||
CMPH_HASH h2_type = hash_get_type(data->h2[0]);
|
||||
memcpy(ptr, &h2_type, sizeof(h2_type));
|
||||
ptr += sizeof(h2_type);
|
||||
|
||||
// packing size
|
||||
memcpy(ptr, data->size, sizeof(cmph_uint8)*data->k);
|
||||
ptr += data->k;
|
||||
|
||||
// packing offset
|
||||
memcpy(ptr, data->offset, sizeof(cmph_uint32)*data->k);
|
||||
ptr += sizeof(cmph_uint32)*data->k;
|
||||
|
||||
#if defined (__ia64) || defined (__x86_64__)
|
||||
cmph_uint64 * g_is_ptr = (cmph_uint64 *)ptr;
|
||||
#else
|
||||
cmph_uint32 * g_is_ptr = ptr;
|
||||
#endif
|
||||
|
||||
cmph_uint8 * g_i = (cmph_uint8 *) (g_is_ptr + data->k);
|
||||
|
||||
for(i = 0; i < data->k; i++)
|
||||
{
|
||||
#if defined (__ia64) || defined (__x86_64__)
|
||||
*g_is_ptr++ = (cmph_uint64)g_i;
|
||||
#else
|
||||
*g_is_ptr++ = (cmph_uint32)g_i;
|
||||
#endif
|
||||
// packing h1[i]
|
||||
hash_state_pack(data->h1[i], g_i);
|
||||
g_i += hash_state_packed_size(h1_type);
|
||||
|
||||
// packing h2[i]
|
||||
hash_state_pack(data->h2[i], g_i);
|
||||
g_i += hash_state_packed_size(h2_type);
|
||||
|
||||
// packing g_i
|
||||
switch(data->algo)
|
||||
{
|
||||
case CMPH_FCH:
|
||||
n = fch_calc_b(data->c, data->size[i]);
|
||||
break;
|
||||
case CMPH_BMZ8:
|
||||
n = ceil(data->c * data->size[i]);
|
||||
break;
|
||||
default: assert(0);
|
||||
}
|
||||
memcpy(g_i, data->g[i], sizeof(cmph_uint8)*n);
|
||||
g_i += n;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** \fn cmph_uint32 brz_packed_size(cmph_t *mphf);
|
||||
|
@ -735,7 +836,149 @@ void brz_pack(cmph_t *mphf, void *packed_mphf)
|
|||
*/
|
||||
cmph_uint32 brz_packed_size(cmph_t *mphf)
|
||||
{
|
||||
return 0;
|
||||
cmph_uint32 i;
|
||||
cmph_uint32 size = 0;
|
||||
brz_data_t *data = (brz_data_t *)mphf->data;
|
||||
CMPH_HASH h0_type = hash_get_type(data->h0);
|
||||
CMPH_HASH h1_type = hash_get_type(data->h1[0]);
|
||||
CMPH_HASH h2_type = hash_get_type(data->h2[0]);
|
||||
size = (2*sizeof(CMPH_ALGO) + 3*sizeof(CMPH_HASH) + hash_state_packed_size(h0_type) + sizeof(cmph_uint32) +
|
||||
sizeof(cmph_float32) + sizeof(cmph_uint8)*data->k + sizeof(cmph_uint32)*data->k);
|
||||
// pointers to g_is
|
||||
#if defined (__ia64) || defined (__x86_64__)
|
||||
size += sizeof(cmph_uint64)*data->k;
|
||||
#else
|
||||
size += sizeof(cmph_uint32)*data->k;
|
||||
#endif
|
||||
|
||||
size += hash_state_packed_size(h1_type) * data->k;
|
||||
size += hash_state_packed_size(h2_type) * data->k;
|
||||
|
||||
cmph_uint32 n = 0;
|
||||
for(i = 0; i < data->k; i++)
|
||||
{
|
||||
switch(data->algo)
|
||||
{
|
||||
case CMPH_FCH:
|
||||
n = fch_calc_b(data->c, data->size[i]);
|
||||
break;
|
||||
case CMPH_BMZ8:
|
||||
n = ceil(data->c * data->size[i]);
|
||||
break;
|
||||
default: assert(0);
|
||||
}
|
||||
size += n;
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static cmph_uint32 brz_bmz8_search_packed(cmph_uint32 *packed_mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint)
|
||||
{
|
||||
register CMPH_HASH h0_type = *packed_mphf++;
|
||||
register cmph_uint32 *h0_ptr = packed_mphf;
|
||||
packed_mphf = (cmph_uint32 *)(((cmph_uint8 *)packed_mphf) + hash_state_packed_size(h0_type));
|
||||
|
||||
register cmph_uint32 k = *packed_mphf++;
|
||||
|
||||
register cmph_float32 c = (cmph_float32)(*packed_mphf);
|
||||
packed_mphf++;
|
||||
|
||||
register CMPH_HASH h1_type = *packed_mphf++;
|
||||
|
||||
register CMPH_HASH h2_type = *packed_mphf++;
|
||||
|
||||
register cmph_uint8 * size = (cmph_uint8 *) packed_mphf;
|
||||
packed_mphf = (cmph_uint32 *)(size + k);
|
||||
|
||||
register cmph_uint32 * offset = packed_mphf;
|
||||
packed_mphf += k;
|
||||
|
||||
register cmph_uint32 h0;
|
||||
|
||||
hash_vector_packed(h0_ptr, h0_type, key, keylen, fingerprint);
|
||||
h0 = fingerprint[2] % k;
|
||||
|
||||
register cmph_uint32 m = size[h0];
|
||||
register cmph_uint32 n = ceil(c * m);
|
||||
|
||||
#if defined (__ia64) || defined (__x86_64__)
|
||||
register cmph_uint64 * g_is_ptr = (cmph_uint64 *)packed_mphf;
|
||||
#else
|
||||
register cmph_uint32 * g_is_ptr = packed_mphf;
|
||||
#endif
|
||||
|
||||
register cmph_uint8 * h1_ptr = (cmph_uint8 *) g_is_ptr[h0];
|
||||
|
||||
register cmph_uint8 * h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
|
||||
|
||||
register cmph_uint8 * g = h2_ptr + hash_state_packed_size(h2_type);
|
||||
|
||||
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n;
|
||||
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n;
|
||||
|
||||
register cmph_uint8 mphf_bucket;
|
||||
|
||||
if (h1 == h2 && ++h2 >= n) h2 = 0;
|
||||
mphf_bucket = g[h1] + g[h2];
|
||||
DEBUGP("key: %s h1: %u h2: %u h0: %u\n", key, h1, h2, h0);
|
||||
DEBUGP("key: %s g[h1]: %u g[h2]: %u offset[h0]: %u edges: %u\n", key, g[h1], g[h2], >offset[h0], m);
|
||||
DEBUGP("Address: %u\n", mphf_bucket + offset[h0]);
|
||||
return (mphf_bucket + offset[h0]);
|
||||
}
|
||||
|
||||
static cmph_uint32 brz_fch_search_packed(cmph_uint32 *packed_mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint)
|
||||
{
|
||||
register CMPH_HASH h0_type = *packed_mphf++;
|
||||
|
||||
register cmph_uint32 *h0_ptr = packed_mphf;
|
||||
packed_mphf = (cmph_uint32 *)(((cmph_uint8 *)packed_mphf) + hash_state_packed_size(h0_type));
|
||||
|
||||
register cmph_uint32 k = *packed_mphf++;
|
||||
|
||||
register cmph_float32 c = (cmph_float32)(*packed_mphf);
|
||||
packed_mphf++;
|
||||
|
||||
register CMPH_HASH h1_type = *packed_mphf++;
|
||||
|
||||
register CMPH_HASH h2_type = *packed_mphf++;
|
||||
|
||||
register cmph_uint8 * size = (cmph_uint8 *) packed_mphf;
|
||||
packed_mphf = (cmph_uint32 *)(size + k);
|
||||
|
||||
register cmph_uint32 * offset = packed_mphf;
|
||||
packed_mphf += k;
|
||||
|
||||
register cmph_uint32 h0;
|
||||
|
||||
hash_vector_packed(h0_ptr, h0_type, key, keylen, fingerprint);
|
||||
h0 = fingerprint[2] % k;
|
||||
|
||||
register cmph_uint32 m = size[h0];
|
||||
register cmph_uint32 b = fch_calc_b(c, m);
|
||||
register cmph_float32 p1 = fch_calc_p1(m);
|
||||
register cmph_float32 p2 = fch_calc_p2(b);
|
||||
|
||||
#if defined (__ia64) || defined (__x86_64__)
|
||||
register cmph_uint64 * g_is_ptr = (cmph_uint64 *)packed_mphf;
|
||||
#else
|
||||
register cmph_uint32 * g_is_ptr = packed_mphf;
|
||||
#endif
|
||||
|
||||
register cmph_uint8 * h1_ptr = (cmph_uint8 *) g_is_ptr[h0];
|
||||
|
||||
register cmph_uint8 * h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
|
||||
|
||||
register cmph_uint8 * g = h2_ptr + hash_state_packed_size(h2_type);
|
||||
|
||||
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % m;
|
||||
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % m;
|
||||
|
||||
register cmph_uint8 mphf_bucket = 0;
|
||||
h1 = mixh10h11h12(b, p1, p2, h1);
|
||||
mphf_bucket = (h2 + g[h1]) % m;
|
||||
return (mphf_bucket + offset[h0]);
|
||||
}
|
||||
|
||||
/** cmph_uint32 brz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||
|
@ -747,6 +990,16 @@ cmph_uint32 brz_packed_size(cmph_t *mphf)
|
|||
*/
|
||||
cmph_uint32 brz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
|
||||
{
|
||||
return 0;
|
||||
register cmph_uint32 *ptr = (cmph_uint32 *)packed_mphf;
|
||||
register CMPH_ALGO algo = *ptr++;
|
||||
cmph_uint32 fingerprint[3];
|
||||
switch(algo)
|
||||
{
|
||||
case CMPH_FCH:
|
||||
return brz_fch_search_packed(ptr, key, keylen, fingerprint);
|
||||
case CMPH_BMZ8:
|
||||
return brz_bmz8_search_packed(ptr, key, keylen, fingerprint);
|
||||
default: assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
54
src/chm.c
54
src/chm.c
|
@ -331,22 +331,33 @@ cmph_uint32 chm_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 ke
|
|||
void chm_pack(cmph_t *mphf, void *packed_mphf)
|
||||
{
|
||||
chm_data_t *data = (chm_data_t *)mphf->data;
|
||||
cmph_uint32 * ptr = packed_mphf;
|
||||
|
||||
cmph_uint8 * ptr = packed_mphf;
|
||||
|
||||
// packing h1 type
|
||||
CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
|
||||
*((cmph_uint32 *) ptr) = h1_type;
|
||||
ptr += sizeof(cmph_uint32);
|
||||
|
||||
// packing h1
|
||||
hash_state_pack(data->hashes[0], ptr);
|
||||
|
||||
ptr += (hash_state_packed_size(data->hashes[0]) >> 2); // (hash_state_packed_size(data->hashes[0]) / 4);
|
||||
|
||||
ptr += hash_state_packed_size(h1_type);
|
||||
|
||||
// packing h2 type
|
||||
CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
|
||||
*((cmph_uint32 *) ptr) = h2_type;
|
||||
ptr += sizeof(cmph_uint32);
|
||||
|
||||
// packing h2
|
||||
hash_state_pack(data->hashes[1], ptr);
|
||||
ptr += (hash_state_packed_size(data->hashes[1]) >> 2); // (hash_state_packed_size(data->hashes[1]) / 4);
|
||||
ptr += hash_state_packed_size(h2_type);
|
||||
|
||||
// packing n
|
||||
*ptr++ = data->n;
|
||||
|
||||
*((cmph_uint32 *) ptr) = data->n;
|
||||
ptr += sizeof(data->n);
|
||||
|
||||
// packing m
|
||||
*ptr++ = data->m;
|
||||
*((cmph_uint32 *) ptr) = data->m;
|
||||
ptr += sizeof(data->m);
|
||||
|
||||
// packing g
|
||||
memcpy(ptr, data->g, sizeof(cmph_uint32)*data->n);
|
||||
|
@ -360,7 +371,11 @@ void chm_pack(cmph_t *mphf, void *packed_mphf)
|
|||
cmph_uint32 chm_packed_size(cmph_t *mphf)
|
||||
{
|
||||
chm_data_t *data = (chm_data_t *)mphf->data;
|
||||
return (sizeof(CMPH_ALGO) + 2*hash_state_packed_size(data->hashes[0]) + 2*sizeof(cmph_uint32) + sizeof(cmph_uint32)*data->n);
|
||||
CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
|
||||
CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
|
||||
|
||||
return (sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
|
||||
4*sizeof(cmph_uint32) + sizeof(cmph_uint32)*data->n);
|
||||
}
|
||||
|
||||
/** cmph_uint32 chm_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||
|
@ -372,22 +387,21 @@ cmph_uint32 chm_packed_size(cmph_t *mphf)
|
|||
*/
|
||||
cmph_uint32 chm_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
|
||||
{
|
||||
register cmph_uint32 *h1_ptr = (cmph_uint32 *)packed_mphf;
|
||||
register cmph_uint32 h1_size = *h1_ptr;
|
||||
register cmph_uint8 *h1_ptr = packed_mphf;
|
||||
register CMPH_HASH h1_type = *((cmph_uint32 *)h1_ptr);
|
||||
h1_ptr += 4;
|
||||
|
||||
// fprintf(stderr, "h1_size:%u\n", h1_size);
|
||||
|
||||
register cmph_uint32 *h2_ptr = h1_ptr + (h1_size >> 2); // h1_ptr + h1_size/4
|
||||
register cmph_uint32 h2_size = *h2_ptr;
|
||||
// fprintf(stderr, "h2_size:%u\n", h2_size);
|
||||
register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
|
||||
register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr);
|
||||
h2_ptr += 4;
|
||||
|
||||
register cmph_uint32 *g_ptr = h2_ptr + (h2_size >> 2); // h2_ptr + h2_size/4
|
||||
register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type));
|
||||
|
||||
register cmph_uint32 n = *g_ptr++;
|
||||
register cmph_uint32 m = *g_ptr++;
|
||||
|
||||
register cmph_uint32 h1 = hash_packed(h1_ptr, key, keylen) % n;
|
||||
register cmph_uint32 h2 = hash_packed(h2_ptr, key, keylen) % n;
|
||||
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n;
|
||||
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n;
|
||||
DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
|
||||
if (h1 == h2 && ++h2 >= n) h2 = 0;
|
||||
DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, g_ptr[h1], g_ptr[h2], m);
|
||||
|
|
55
src/cmph.c
55
src/cmph.c
|
@ -304,34 +304,37 @@ void cmph_config_set_memory_availability(cmph_config_t *mph, cmph_uint32 memory_
|
|||
|
||||
void cmph_config_destroy(cmph_config_t *mph)
|
||||
{
|
||||
DEBUGP("Destroying mph with algo %s\n", cmph_names[mph->algo]);
|
||||
switch (mph->algo)
|
||||
if(mph)
|
||||
{
|
||||
case CMPH_CHM:
|
||||
chm_config_destroy(mph);
|
||||
break;
|
||||
case CMPH_BMZ: /* included -- Fabiano */
|
||||
bmz_config_destroy(mph);
|
||||
break;
|
||||
case CMPH_BMZ8: /* included -- Fabiano */
|
||||
bmz8_config_destroy(mph);
|
||||
break;
|
||||
case CMPH_BRZ: /* included -- Fabiano */
|
||||
brz_config_destroy(mph);
|
||||
break;
|
||||
case CMPH_FCH: /* included -- Fabiano */
|
||||
fch_config_destroy(mph);
|
||||
break;
|
||||
case CMPH_BDZ: /* included -- Fabiano */
|
||||
bdz_config_destroy(mph);
|
||||
break;
|
||||
case CMPH_BDZ_PH: /* included -- Fabiano */
|
||||
bdz_ph_config_destroy(mph);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
DEBUGP("Destroying mph with algo %s\n", cmph_names[mph->algo]);
|
||||
switch (mph->algo)
|
||||
{
|
||||
case CMPH_CHM:
|
||||
chm_config_destroy(mph);
|
||||
break;
|
||||
case CMPH_BMZ: /* included -- Fabiano */
|
||||
bmz_config_destroy(mph);
|
||||
break;
|
||||
case CMPH_BMZ8: /* included -- Fabiano */
|
||||
bmz8_config_destroy(mph);
|
||||
break;
|
||||
case CMPH_BRZ: /* included -- Fabiano */
|
||||
brz_config_destroy(mph);
|
||||
break;
|
||||
case CMPH_FCH: /* included -- Fabiano */
|
||||
fch_config_destroy(mph);
|
||||
break;
|
||||
case CMPH_BDZ: /* included -- Fabiano */
|
||||
bdz_config_destroy(mph);
|
||||
break;
|
||||
case CMPH_BDZ_PH: /* included -- Fabiano */
|
||||
bdz_ph_config_destroy(mph);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
__config_destroy(mph);
|
||||
}
|
||||
__config_destroy(mph);
|
||||
}
|
||||
|
||||
void cmph_config_set_verbosity(cmph_config_t *mph, cmph_uint32 verbosity)
|
||||
|
|
|
@ -6,6 +6,28 @@ typedef unsigned short cmph_uint16;
|
|||
typedef unsigned int cmph_uint32;
|
||||
typedef float cmph_float32;
|
||||
|
||||
#if defined (__ia64) || defined (__x86_64__)
|
||||
/** \typedef long cmph_int64;
|
||||
* \brief 64-bit integer for a 64-bit achitecture.
|
||||
*/
|
||||
typedef long cmph_int64;
|
||||
|
||||
/** \typedef unsigned long cmph_uint64;
|
||||
* \brief Unsigned 64-bit integer for a 64-bit achitecture.
|
||||
*/
|
||||
typedef unsigned long cmph_uint64;
|
||||
#else
|
||||
/** \typedef long long cmph_int64;
|
||||
* \brief 64-bit integer for a 32-bit achitecture.
|
||||
*/
|
||||
typedef long long cmph_int64;
|
||||
|
||||
/** \typedef unsigned long long cmph_uint64;
|
||||
* \brief Unsigned 64-bit integer for a 32-bit achitecture.
|
||||
*/
|
||||
typedef unsigned long long cmph_uint64;
|
||||
#endif
|
||||
|
||||
typedef enum { CMPH_HASH_JENKINS, CMPH_HASH_COUNT } CMPH_HASH;
|
||||
extern const char *cmph_hash_names[];
|
||||
typedef enum { CMPH_BMZ, CMPH_BMZ8, CMPH_CHM, CMPH_BRZ, CMPH_FCH,
|
||||
|
|
85
src/fch.c
85
src/fch.c
|
@ -425,7 +425,17 @@ void fch_destroy(cmph_t *mphf)
|
|||
*/
|
||||
cmph_uint32 fch_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint)
|
||||
{
|
||||
return 0;
|
||||
register fch_data_t *fch = mphf->data;
|
||||
|
||||
hash_vector(fch->h1, key, keylen, fingerprint);
|
||||
register cmph_uint32 h1 = fingerprint[2] % fch->m;
|
||||
|
||||
hash_vector(fch->h2, key, keylen, fingerprint);
|
||||
register cmph_uint32 h2 = fingerprint[2] % fch->m;
|
||||
|
||||
h1 = mixh10h11h12 (fch->b, fch->p1, fch->p2, h1);
|
||||
//DEBUGP("key: %s h1: %u h2: %u g[h1]: %u\n", key, h1, h2, fch->g[h1]);
|
||||
return (h2 + fch->g[h1]) % fch->m;
|
||||
}
|
||||
|
||||
/** \fn void fch_pack(cmph_t *mphf, void *packed_mphf);
|
||||
|
@ -435,6 +445,45 @@ cmph_uint32 fch_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 ke
|
|||
*/
|
||||
void fch_pack(cmph_t *mphf, void *packed_mphf)
|
||||
{
|
||||
fch_data_t *data = (fch_data_t *)mphf->data;
|
||||
cmph_uint8 * ptr = packed_mphf;
|
||||
|
||||
// packing h1 type
|
||||
CMPH_HASH h1_type = hash_get_type(data->h1);
|
||||
*((cmph_uint32 *) ptr) = h1_type;
|
||||
ptr += sizeof(cmph_uint32);
|
||||
|
||||
// packing h1
|
||||
hash_state_pack(data->h1, ptr);
|
||||
ptr += hash_state_packed_size(h1_type);
|
||||
|
||||
// packing h2 type
|
||||
CMPH_HASH h2_type = hash_get_type(data->h2);
|
||||
*((cmph_uint32 *) ptr) = h2_type;
|
||||
ptr += sizeof(cmph_uint32);
|
||||
|
||||
// packing h2
|
||||
hash_state_pack(data->h2, ptr);
|
||||
ptr += hash_state_packed_size(h2_type);
|
||||
|
||||
// packing m
|
||||
*((cmph_uint32 *) ptr) = data->m;
|
||||
ptr += sizeof(data->m);
|
||||
|
||||
// packing b
|
||||
*((cmph_uint32 *) ptr) = data->b;
|
||||
ptr += sizeof(data->b);
|
||||
|
||||
// packing p1
|
||||
*((cmph_uint32 *)ptr) = (cmph_uint32)data->p1;
|
||||
ptr += sizeof(data->p1);
|
||||
|
||||
// packing p2
|
||||
*((cmph_uint32 *)ptr) = (cmph_uint32)data->p2;
|
||||
ptr += sizeof(data->p2);
|
||||
|
||||
// packing g
|
||||
memcpy(ptr, data->g, sizeof(cmph_uint32)*(data->b));
|
||||
}
|
||||
|
||||
/** \fn cmph_uint32 fch_packed_size(cmph_t *mphf);
|
||||
|
@ -444,7 +493,12 @@ void fch_pack(cmph_t *mphf, void *packed_mphf)
|
|||
*/
|
||||
cmph_uint32 fch_packed_size(cmph_t *mphf)
|
||||
{
|
||||
return 0;
|
||||
fch_data_t *data = (fch_data_t *)mphf->data;
|
||||
CMPH_HASH h1_type = hash_get_type(data->h1);
|
||||
CMPH_HASH h2_type = hash_get_type(data->h2);
|
||||
|
||||
return (sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
|
||||
4*sizeof(cmph_uint32) + 2*sizeof(cmph_float32) + sizeof(cmph_uint32)*(data->b));
|
||||
}
|
||||
|
||||
|
||||
|
@ -457,6 +511,31 @@ cmph_uint32 fch_packed_size(cmph_t *mphf)
|
|||
*/
|
||||
cmph_uint32 fch_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
|
||||
{
|
||||
return 0;
|
||||
register cmph_uint8 *h1_ptr = packed_mphf;
|
||||
register CMPH_HASH h1_type = *((cmph_uint32 *)h1_ptr);
|
||||
h1_ptr += 4;
|
||||
|
||||
register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
|
||||
register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr);
|
||||
h2_ptr += 4;
|
||||
|
||||
register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type));
|
||||
|
||||
register cmph_uint32 m = *g_ptr++;
|
||||
|
||||
register cmph_uint32 b = *g_ptr++;
|
||||
|
||||
register cmph_float32 p1 = (cmph_float32)(*g_ptr);
|
||||
g_ptr++;
|
||||
|
||||
register cmph_float32 p2 = (cmph_float32)(*g_ptr);
|
||||
g_ptr++;
|
||||
|
||||
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % m;
|
||||
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % m;
|
||||
|
||||
h1 = mixh10h11h12 (b, p1, p2, h1);
|
||||
return (h2 + g_ptr[h1]) % m;
|
||||
|
||||
}
|
||||
|
||||
|
|
62
src/hash.c
62
src/hash.c
|
@ -124,28 +124,21 @@ void hash_state_destroy(hash_state_t *state)
|
|||
return;
|
||||
}
|
||||
|
||||
/** \fn void hash_state_pack(hash_state_t *state, void *hash_packed);
|
||||
/** \fn void hash_state_pack(hash_state_t *state, void *hash_packed)
|
||||
* \brief Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed.
|
||||
* \param state points to the hash function
|
||||
* \param hash_packed pointer to the contiguous memory area used to store the hash function. The size of hash_packed must be at least hash_state_packed_size()
|
||||
* \param hash_packed pointer to the contiguous memory area used to store the hash function. The size of hash_packed must be at least hash_state_packed_size()
|
||||
*
|
||||
* Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed.
|
||||
* However, the hash function type must be packed outside.
|
||||
*/
|
||||
void hash_state_pack(hash_state_t *state, void *hash_packed)
|
||||
{
|
||||
cmph_uint32 * ptr = (cmph_uint32 *)hash_packed;
|
||||
cmph_uint32 * ptr_size = ptr++;
|
||||
|
||||
// Reserve space for the hash function size
|
||||
*ptr_size = 0;
|
||||
|
||||
// Pack the hash function type
|
||||
*ptr++ = state->hashfunc;
|
||||
|
||||
switch (state->hashfunc)
|
||||
{
|
||||
case CMPH_HASH_JENKINS:
|
||||
// pack the jenkins hash function
|
||||
jenkins_state_pack((jenkins_state_t *)state, ptr);
|
||||
*ptr_size = sizeof(cmph_uint32) + sizeof(CMPH_HASH) + jenkins_state_packed_size();
|
||||
jenkins_state_pack((jenkins_state_t *)state, hash_packed);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
|
@ -153,15 +146,15 @@ void hash_state_pack(hash_state_t *state, void *hash_packed)
|
|||
return;
|
||||
}
|
||||
|
||||
/** \fn cmph_uint32 hash_state_packed_size(hash_state_t *state);
|
||||
/** \fn cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc)
|
||||
* \brief Return the amount of space needed to pack a hash function.
|
||||
* \param state points to a hash function
|
||||
* \param hashfunc function type
|
||||
* \return the size of the packed function or zero for failures
|
||||
*/
|
||||
cmph_uint32 hash_state_packed_size(hash_state_t *state)
|
||||
cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc)
|
||||
{
|
||||
cmph_uint32 size = sizeof(cmph_uint32) + sizeof(CMPH_HASH);
|
||||
switch (state->hashfunc)
|
||||
cmph_uint32 size = 0;
|
||||
switch (hashfunc)
|
||||
{
|
||||
case CMPH_HASH_JENKINS:
|
||||
size += jenkins_state_packed_size();
|
||||
|
@ -172,23 +165,19 @@ cmph_uint32 hash_state_packed_size(hash_state_t *state)
|
|||
return size;
|
||||
}
|
||||
|
||||
|
||||
/** \fn cmph_uint32 hash_packed(void *hash_packed, const char *k, cmph_uint32 keylen)
|
||||
/** \fn cmph_uint32 hash_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen)
|
||||
* \param hash_packed is a pointer to a contiguous memory area
|
||||
* \param hashfunc is the type of the hash function packed in hash_packed
|
||||
* \param key is a pointer to a key
|
||||
* \param keylen is the key length
|
||||
* \return an integer that represents a hash value of 32 bits.
|
||||
*/
|
||||
cmph_uint32 hash_packed(void *hash_packed, const char *k, cmph_uint32 keylen)
|
||||
cmph_uint32 hash_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen)
|
||||
{
|
||||
register cmph_uint32 * ptr = (((cmph_uint32 *) hash_packed) + 1);
|
||||
|
||||
register CMPH_HASH hashfunc = *ptr++;
|
||||
|
||||
switch (hashfunc)
|
||||
{
|
||||
case CMPH_HASH_JENKINS:
|
||||
return jenkins_hash_packed(ptr, k, keylen);
|
||||
return jenkins_hash_packed(hash_packed, k, keylen);
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
@ -196,23 +185,30 @@ cmph_uint32 hash_packed(void *hash_packed, const char *k, cmph_uint32 keylen)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/** \fn hash_vector_packed(void *hash_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
|
||||
/** \fn hash_vector_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
|
||||
* \param hash_packed is a pointer to a contiguous memory area
|
||||
* \param key is a pointer to a key
|
||||
* \param keylen is the key length
|
||||
* \param hashes is a pointer to a memory large enough to fit three 32-bit integers.
|
||||
*/
|
||||
void hash_vector_packed(void *hash_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
|
||||
{
|
||||
cmph_uint32 * ptr = (((cmph_uint32 *) hash_packed) + 1);
|
||||
|
||||
CMPH_HASH hashfunc = *ptr++;
|
||||
void hash_vector_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
|
||||
{
|
||||
switch (hashfunc)
|
||||
{
|
||||
case CMPH_HASH_JENKINS:
|
||||
jenkins_hash_vector_packed(ptr, k, keylen, hashes);
|
||||
jenkins_hash_vector_packed(hash_packed, k, keylen, hashes);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** \fn CMPH_HASH hash_get_type(hash_state_t *state);
|
||||
* \param state is a pointer to a hash_state_t structure
|
||||
* \return the hash function type pointed by state
|
||||
*/
|
||||
CMPH_HASH hash_get_type(hash_state_t *state)
|
||||
{
|
||||
return state->hashfunc;
|
||||
}
|
||||
|
|
37
src/hash.h
37
src/hash.h
|
@ -34,32 +34,43 @@ void hash_state_destroy(hash_state_t *state);
|
|||
/** \fn void hash_state_pack(hash_state_t *state, void *hash_packed);
|
||||
* \brief Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed.
|
||||
* \param state points to the hash function
|
||||
* \param hash_packed pointer to the contiguous memory area used to store the hash function. The size of hash_packed must be at least hash_state_packed_size()
|
||||
* \param hash_packed pointer to the contiguous memory area used to store the hash function. The size of hash_packed must be at least hash_state_packed_size()
|
||||
*
|
||||
* Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed.
|
||||
* However, the hash function type must be packed outside.
|
||||
*/
|
||||
void hash_state_pack(hash_state_t *state, void *hash_packed);
|
||||
|
||||
/** \fn cmph_uint32 hash_state_packed_size(hash_state_t *state);
|
||||
* \brief Return the amount of space needed to pack a hash function.
|
||||
* \param state points to a hash function
|
||||
* \return the size of the packed function or zero for failures
|
||||
*/
|
||||
cmph_uint32 hash_state_packed_size(hash_state_t *state);
|
||||
|
||||
|
||||
/** \fn cmph_uint32 hash_packed(void *hash_packed, const char *k, cmph_uint32 keylen);
|
||||
/** \fn cmph_uint32 hash_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen);
|
||||
* \param hash_packed is a pointer to a contiguous memory area
|
||||
* \param hashfunc is the type of the hash function packed in hash_packed
|
||||
* \param key is a pointer to a key
|
||||
* \param keylen is the key length
|
||||
* \return an integer that represents a hash value of 32 bits.
|
||||
*/
|
||||
cmph_uint32 hash_packed(void *hash_packed, const char *k, cmph_uint32 keylen);
|
||||
cmph_uint32 hash_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen);
|
||||
|
||||
/** \fn hash_vector_packed(void *hash_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
|
||||
/** \fn cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc)
|
||||
* \brief Return the amount of space needed to pack a hash function.
|
||||
* \param hashfunc function type
|
||||
* \return the size of the packed function or zero for failures
|
||||
*/
|
||||
cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc);
|
||||
|
||||
|
||||
/** \fn hash_vector_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
|
||||
* \param hash_packed is a pointer to a contiguous memory area
|
||||
* \param key is a pointer to a key
|
||||
* \param keylen is the key length
|
||||
* \param hashes is a pointer to a memory large enough to fit three 32-bit integers.
|
||||
*/
|
||||
void hash_vector_packed(void *hash_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
|
||||
void hash_vector_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
|
||||
|
||||
|
||||
/** \fn CMPH_HASH hash_get_type(hash_state_t *state);
|
||||
* \param state is a pointer to a hash_state_t structure
|
||||
* \return the hash function type pointed by state
|
||||
*/
|
||||
CMPH_HASH hash_get_type(hash_state_t *state);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -241,6 +241,7 @@ int main(int argc, char **argv)
|
|||
if(mph_algo == CMPH_BMZ && c >= 2.0) c=1.15;
|
||||
if (c != 0) cmph_config_set_graphsize(config, c);
|
||||
mphf = cmph_new(config);
|
||||
|
||||
cmph_config_destroy(config);
|
||||
if (mphf == NULL)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue