*** empty log message ***

This commit is contained in:
fc_botelho 2008-03-29 01:48:15 +00:00
parent c8a1c4fef9
commit b0c9cd5c4c
12 changed files with 611 additions and 189 deletions

View File

@ -634,31 +634,34 @@ cmph_uint32 bdz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 ke
void bdz_pack(cmph_t *mphf, void *packed_mphf)
{
bdz_data_t *data = (bdz_data_t *)mphf->data;
cmph_uint32 * ptr = packed_mphf;
cmph_uint8 * ptr = packed_mphf;
// packing hl type
CMPH_HASH hl_type = hash_get_type(data->hl);
*((cmph_uint32 *) ptr) = hl_type;
ptr += sizeof(cmph_uint32);
// packing hl
hash_state_pack(data->hl, ptr);
ptr += (hash_state_packed_size(data->hl) >> 2); // (hash_state_packed_size(data->hl) / 4);
ptr += hash_state_packed_size(hl_type);
// packing r
*ptr++ = data->r;
*((cmph_uint32 *) ptr) = data->r;
ptr += sizeof(data->r);
// packing ranktablesize
*ptr++ = data->ranktablesize;
*((cmph_uint32 *) ptr) = data->ranktablesize;
ptr += sizeof(data->ranktablesize);
// packing ranktable
memcpy(ptr, data->ranktable, sizeof(cmph_uint32)*(data->ranktablesize));
ptr += data->ranktablesize;
cmph_uint8 * ptr8 = (cmph_uint8 *) ptr;
ptr += sizeof(cmph_uint32)*(data->ranktablesize);
// packing b
*ptr8++ = data->b;
*ptr++ = data->b;
// packing g
memcpy(ptr8, data->g, sizeof(cmph_uint8)*((data->n >> 2) +1));
memcpy(ptr, data->g, sizeof(cmph_uint8)*((data->n >> 2) +1));
}
/** \fn cmph_uint32 bdz_packed_size(cmph_t *mphf);
@ -669,7 +672,10 @@ void bdz_pack(cmph_t *mphf, void *packed_mphf)
cmph_uint32 bdz_packed_size(cmph_t *mphf)
{
bdz_data_t *data = (bdz_data_t *)mphf->data;
return (sizeof(CMPH_ALGO) + hash_state_packed_size(data->hl) + (sizeof(cmph_uint32) << 1) + sizeof(cmph_uint32)*(data->ranktablesize) + sizeof(cmph_uint8) + sizeof(cmph_uint8)*((data->n >> 2) +1));
CMPH_HASH hl_type = hash_get_type(data->hl);
return (sizeof(CMPH_ALGO) + hash_state_packed_size(hl_type) + 3*sizeof(cmph_uint32) + sizeof(cmph_uint32)*(data->ranktablesize) + sizeof(cmph_uint8) + sizeof(cmph_uint8)*((data->n >> 2) +1));
}
/** cmph_uint32 bdz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
@ -681,21 +687,20 @@ cmph_uint32 bdz_packed_size(cmph_t *mphf)
*/
cmph_uint32 bdz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
{
register cmph_uint32 vertex;
register cmph_uint32 *hl_ptr = (cmph_uint32 *)packed_mphf;
register cmph_uint32 hl_size = *hl_ptr;
register cmph_uint32 *ptr = hl_ptr + (hl_size >> 2); // h2_ptr + h2_size/4
register CMPH_HASH hl_type = *(cmph_uint32 *)packed_mphf;
register cmph_uint8 *hl_ptr = (cmph_uint8 *)(packed_mphf + 4);
register cmph_uint32 r = *ptr++;
register cmph_uint32 ranktablesize = *ptr++;
register cmph_uint32 *ranktable = ptr;
ptr += ranktablesize;
register cmph_uint32 *ranktable = (cmph_uint32*)(hl_ptr + hash_state_packed_size(hl_type));
register cmph_uint8 * g = (cmph_uint8 *)ptr;
register cmph_uint32 r = *ranktable++;
register cmph_uint32 ranktablesize = *ranktable++;
register cmph_uint8 * g = (cmph_uint8 *)(ranktable + ranktablesize);
register cmph_uint8 b = *g++;
cmph_uint32 hl[3];
hash_vector_packed(hl_ptr, key, keylen, hl);
hash_vector_packed(hl_ptr, hl_type, key, keylen, hl);
hl[0] = hl[0] % r;
hl[1] = hl[1] % r + r;
hl[2] = hl[2] % r + (r << 1);

View File

@ -569,16 +569,20 @@ cmph_uint32 bdz_ph_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32
void bdz_ph_pack(cmph_t *mphf, void *packed_mphf)
{
bdz_ph_data_t *data = (bdz_ph_data_t *)mphf->data;
cmph_uint32 * ptr = packed_mphf;
cmph_uint8 * ptr = packed_mphf;
// packing hl type
CMPH_HASH hl_type = hash_get_type(data->hl);
*((cmph_uint32 *) ptr) = hl_type;
ptr += sizeof(cmph_uint32);
// packing hl
hash_state_pack(data->hl, ptr);
ptr += (hash_state_packed_size(data->hl) >> 2); // (hash_state_packed_size(data->hl) / 4);
ptr += hash_state_packed_size(hl_type);
// packing r
*ptr++ = data->r;
*((cmph_uint32 *) ptr) = data->r;
ptr += sizeof(data->r);
// packing g
memcpy(ptr, data->g, sizeof(cmph_uint8)*((data->n/5)+1));
@ -592,7 +596,9 @@ void bdz_ph_pack(cmph_t *mphf, void *packed_mphf)
cmph_uint32 bdz_ph_packed_size(cmph_t *mphf)
{
bdz_ph_data_t *data = (bdz_ph_data_t *)mphf->data;
return (sizeof(CMPH_ALGO) + hash_state_packed_size(data->hl) + sizeof(cmph_uint32) + sizeof(cmph_uint8)*((data->n/5)+1));
CMPH_HASH hl_type = hash_get_type(data->hl);
return (sizeof(CMPH_ALGO) + hash_state_packed_size(hl_type) + 2*sizeof(cmph_uint32) + sizeof(cmph_uint8)*((data->n/5)+1));
}
/** cmph_uint32 bdz_ph_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
@ -605,18 +611,19 @@ cmph_uint32 bdz_ph_packed_size(cmph_t *mphf)
cmph_uint32 bdz_ph_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
{
register cmph_uint32 *hl_ptr = (cmph_uint32 *)packed_mphf;
register cmph_uint32 hl_size = *hl_ptr;
register cmph_uint32 *ptr = hl_ptr + (hl_size >> 2); // h2_ptr + h2_size/4
register CMPH_HASH hl_type = *(cmph_uint32 *)packed_mphf;
register cmph_uint8 *hl_ptr = (cmph_uint8 *)(packed_mphf + 4);
register cmph_uint32 r = *ptr++;
register cmph_uint8 * g = (cmph_uint8 *)ptr;
register cmph_uint8 * ptr = hl_ptr + hash_state_packed_size(hl_type);
register cmph_uint32 r = *((cmph_uint32*) ptr);
register cmph_uint8 * g = ptr + 4;
cmph_uint32 hl[3];
register cmph_uint8 byte0, byte1, byte2;
register cmph_uint32 vertex;
hash_vector_packed(hl_ptr, key, keylen, hl);
hash_vector_packed(hl_ptr, hl_type, key, keylen, hl);
hl[0] = hl[0] % r;
hl[1] = hl[1] % r + r;

View File

@ -575,20 +575,31 @@ cmph_uint32 bmz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 ke
*/
void bmz_pack(cmph_t *mphf, void *packed_mphf)
{
bmz_data_t *data = (bmz_data_t *)mphf->data;
cmph_uint32 * ptr = packed_mphf;
cmph_uint8 * ptr = packed_mphf;
// packing h1 type
CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
*((cmph_uint32 *) ptr) = h1_type;
ptr += sizeof(cmph_uint32);
// packing h1
hash_state_pack(data->hashes[0], ptr);
ptr += hash_state_packed_size(h1_type);
ptr += (hash_state_packed_size(data->hashes[0]) >> 2); // (hash_state_packed_size(data->hashes[0]) / 4);
// packing h2 type
CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
*((cmph_uint32 *) ptr) = h2_type;
ptr += sizeof(cmph_uint32);
// packing h2
hash_state_pack(data->hashes[1], ptr);
ptr += (hash_state_packed_size(data->hashes[1]) >> 2); // (hash_state_packed_size(data->hashes[1]) / 4);
ptr += hash_state_packed_size(h2_type);
// packing n
*ptr++ = data->n;
*((cmph_uint32 *) ptr) = data->n;
ptr += sizeof(data->n);
// packing g
memcpy(ptr, data->g, sizeof(cmph_uint32)*data->n);
@ -602,7 +613,11 @@ void bmz_pack(cmph_t *mphf, void *packed_mphf)
cmph_uint32 bmz_packed_size(cmph_t *mphf)
{
bmz_data_t *data = (bmz_data_t *)mphf->data;
return (sizeof(CMPH_ALGO) + 2*hash_state_packed_size(data->hashes[0]) + sizeof(cmph_uint32) + sizeof(cmph_uint32)*data->n);
CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
return (sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
3*sizeof(cmph_uint32) + sizeof(cmph_uint32)*data->n);
}
/** cmph_uint32 bmz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
@ -614,20 +629,22 @@ cmph_uint32 bmz_packed_size(cmph_t *mphf)
*/
cmph_uint32 bmz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
{
register cmph_uint32 *h1_ptr = (cmph_uint32 *)packed_mphf;
register cmph_uint32 h1_size = *h1_ptr;
register cmph_uint8 *h1_ptr = packed_mphf;
register CMPH_HASH h1_type = *((cmph_uint32 *)h1_ptr);
h1_ptr += 4;
register cmph_uint32 *h2_ptr = h1_ptr + (h1_size >> 2); // h1_ptr + h1_size/4
register cmph_uint32 h2_size = *h2_ptr;
register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr);
h2_ptr += 4;
register cmph_uint32 *g_ptr = h2_ptr + (h2_size >> 2); // h2_ptr + h2_size/4
register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type));
register cmph_uint32 n = *g_ptr++;
register cmph_uint32 h1 = hash_packed(h1_ptr, key, keylen) % n;
register cmph_uint32 h2 = hash_packed(h2_ptr, key, keylen) % n;
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n;
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n;
DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
if (h1 == h2 && ++h2 > n) h2 = 0;
DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, g_ptr[h1], g_ptr[h2], m);
return (g_ptr[h1] + g_ptr[h2]);
}

View File

@ -587,21 +587,29 @@ void bmz8_pack(cmph_t *mphf, void *packed_mphf)
bmz8_data_t *data = (bmz8_data_t *)mphf->data;
cmph_uint8 * ptr = packed_mphf;
// packing h1 type
CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
*((cmph_uint32 *) ptr) = h1_type;
ptr += sizeof(cmph_uint32);
// packing h1
hash_state_pack(data->hashes[0], ptr);
ptr += hash_state_packed_size(h1_type);
ptr += hash_state_packed_size(data->hashes[0]);
// packing h2 type
CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
*((cmph_uint32 *) ptr) = h2_type;
ptr += sizeof(cmph_uint32);
// packing h2
hash_state_pack(data->hashes[1], ptr);
ptr += hash_state_packed_size(data->hashes[1]);
ptr += hash_state_packed_size(h2_type);
// packing n
*ptr++ = data->n;
// packing g
memcpy(ptr, data->g, sizeof(cmph_uint8)*data->n);
}
/** \fn cmph_uint32 bmz8_packed_size(cmph_t *mphf);
@ -612,7 +620,11 @@ void bmz8_pack(cmph_t *mphf, void *packed_mphf)
cmph_uint32 bmz8_packed_size(cmph_t *mphf)
{
bmz8_data_t *data = (bmz8_data_t *)mphf->data;
return (sizeof(CMPH_ALGO) + 2*hash_state_packed_size(data->hashes[0]) + sizeof(cmph_uint8) + sizeof(cmph_uint8)*data->n);
CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
return (sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
2*sizeof(cmph_uint32) + sizeof(cmph_uint8) + sizeof(cmph_uint8)*data->n);
}
/** cmph_uint8 bmz8_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
@ -624,20 +636,22 @@ cmph_uint32 bmz8_packed_size(cmph_t *mphf)
*/
cmph_uint8 bmz8_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
{
register cmph_uint32 *h1_ptr = (cmph_uint32 *)packed_mphf;
register cmph_uint32 h1_size = *h1_ptr;
register cmph_uint8 *h1_ptr = packed_mphf;
register CMPH_HASH h1_type = *((cmph_uint32 *)h1_ptr);
h1_ptr += 4;
register cmph_uint32 *h2_ptr = h1_ptr + (h1_size >> 2); // h1_ptr + h1_size/4
register cmph_uint32 h2_size = *h2_ptr;
register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr);
h2_ptr += 4;
register cmph_uint8 *g_ptr = (cmph_uint8 *)(h2_ptr + (h2_size >> 2)); // h2_ptr + h2_size/4
register cmph_uint8 *g_ptr = h2_ptr + hash_state_packed_size(h2_type);
register cmph_uint8 n = *g_ptr++;
register cmph_uint8 h1 = hash_packed(h1_ptr, key, keylen) % n;
register cmph_uint8 h2 = hash_packed(h2_ptr, key, keylen) % n;
register cmph_uint8 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n;
register cmph_uint8 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n;
DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
if (h1 == h2 && ++h2 > n) h2 = 0;
DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, g_ptr[h1], g_ptr[h2], m);
return (g_ptr[h1] + g_ptr[h2]);
}

293
src/brz.c
View File

@ -635,14 +635,19 @@ void brz_load(FILE *f, cmph_t *mphf)
return;
}
static cmph_uint32 brz_bmz8_search(brz_data_t *brz, const char *key, cmph_uint32 keylen)
static cmph_uint32 brz_bmz8_search(brz_data_t *brz, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint)
{
cmph_uint32 h0 = hash(brz->h0, key, keylen) % brz->k;
cmph_uint32 m = brz->size[h0];
cmph_uint32 n = ceil(brz->c * m);
cmph_uint32 h1 = hash(brz->h1[h0], key, keylen) % n;
cmph_uint32 h2 = hash(brz->h2[h0], key, keylen) % n;
cmph_uint8 mphf_bucket;
register cmph_uint32 h0;
hash_vector(brz->h0, key, keylen, fingerprint);
h0 = fingerprint[2] % brz->k;
register cmph_uint32 m = brz->size[h0];
register cmph_uint32 n = ceil(brz->c * m);
register cmph_uint32 h1 = hash(brz->h1[h0], key, keylen) % n;
register cmph_uint32 h2 = hash(brz->h2[h0], key, keylen) % n;
register cmph_uint8 mphf_bucket;
if (h1 == h2 && ++h2 >= n) h2 = 0;
mphf_bucket = brz->g[h0][h1] + brz->g[h0][h2];
DEBUGP("key: %s h1: %u h2: %u h0: %u\n", key, h1, h2, h0);
@ -651,16 +656,20 @@ static cmph_uint32 brz_bmz8_search(brz_data_t *brz, const char *key, cmph_uint32
return (mphf_bucket + brz->offset[h0]);
}
static cmph_uint32 brz_fch_search(brz_data_t *brz, const char *key, cmph_uint32 keylen)
static cmph_uint32 brz_fch_search(brz_data_t *brz, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint)
{
cmph_uint32 h0 = hash(brz->h0, key, keylen) % brz->k;
cmph_uint32 m = brz->size[h0];
cmph_uint32 b = fch_calc_b(brz->c, m);
cmph_float32 p1 = fch_calc_p1(m);
cmph_float32 p2 = fch_calc_p2(b);
cmph_uint32 h1 = hash(brz->h1[h0], key, keylen) % m;
cmph_uint32 h2 = hash(brz->h2[h0], key, keylen) % m;
cmph_uint8 mphf_bucket = 0;
register cmph_uint32 h0;
hash_vector(brz->h0, key, keylen, fingerprint);
h0 = fingerprint[2] % brz->k;
register cmph_uint32 m = brz->size[h0];
register cmph_uint32 b = fch_calc_b(brz->c, m);
register cmph_float32 p1 = fch_calc_p1(m);
register cmph_float32 p2 = fch_calc_p2(b);
register cmph_uint32 h1 = hash(brz->h1[h0], key, keylen) % m;
register cmph_uint32 h2 = hash(brz->h2[h0], key, keylen) % m;
register cmph_uint8 mphf_bucket = 0;
h1 = mixh10h11h12(b, p1, p2, h1);
mphf_bucket = (h2 + brz->g[h0][h1]) % m;
return (mphf_bucket + brz->offset[h0]);
@ -669,12 +678,13 @@ static cmph_uint32 brz_fch_search(brz_data_t *brz, const char *key, cmph_uint32
cmph_uint32 brz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
{
brz_data_t *brz = mphf->data;
cmph_uint32 fingerprint[3];
switch(brz->algo)
{
case CMPH_FCH:
return brz_fch_search(brz, key, keylen);
return brz_fch_search(brz, key, keylen, fingerprint);
case CMPH_BMZ8:
return brz_bmz8_search(brz, key, keylen);
return brz_bmz8_search(brz, key, keylen, fingerprint);
default: assert(0);
}
return 0;
@ -716,6 +726,15 @@ void brz_destroy(cmph_t *mphf)
*/
cmph_uint32 brz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint)
{
brz_data_t *brz = mphf->data;
switch(brz->algo)
{
case CMPH_FCH:
return brz_fch_search(brz, key, keylen, fingerprint);
case CMPH_BMZ8:
return brz_bmz8_search(brz, key, keylen, fingerprint);
default: assert(0);
}
return 0;
}
@ -726,6 +745,88 @@ cmph_uint32 brz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 ke
*/
void brz_pack(cmph_t *mphf, void *packed_mphf)
{
brz_data_t *data = (brz_data_t *)mphf->data;
cmph_uint8 * ptr = packed_mphf;
cmph_uint32 i,n;
// packing internal algo type
memcpy(ptr, &(data->algo), sizeof(data->algo));
ptr += sizeof(data->algo);
// packing h0 type
CMPH_HASH h0_type = hash_get_type(data->h0);
memcpy(ptr, &h0_type, sizeof(h0_type));
ptr += sizeof(h0_type);
// packing h0
hash_state_pack(data->h0, ptr);
ptr += hash_state_packed_size(h0_type);
// packing k
memcpy(ptr, &(data->k), sizeof(data->k));
ptr += sizeof(data->k);
// packing c
*((cmph_uint32 *)ptr) = (cmph_uint32)data->c;
ptr += sizeof(data->c);
// packing h1 type
CMPH_HASH h1_type = hash_get_type(data->h1[0]);
memcpy(ptr, &h1_type, sizeof(h1_type));
ptr += sizeof(h1_type);
// packing h2 type
CMPH_HASH h2_type = hash_get_type(data->h2[0]);
memcpy(ptr, &h2_type, sizeof(h2_type));
ptr += sizeof(h2_type);
// packing size
memcpy(ptr, data->size, sizeof(cmph_uint8)*data->k);
ptr += data->k;
// packing offset
memcpy(ptr, data->offset, sizeof(cmph_uint32)*data->k);
ptr += sizeof(cmph_uint32)*data->k;
#if defined (__ia64) || defined (__x86_64__)
cmph_uint64 * g_is_ptr = (cmph_uint64 *)ptr;
#else
cmph_uint32 * g_is_ptr = ptr;
#endif
cmph_uint8 * g_i = (cmph_uint8 *) (g_is_ptr + data->k);
for(i = 0; i < data->k; i++)
{
#if defined (__ia64) || defined (__x86_64__)
*g_is_ptr++ = (cmph_uint64)g_i;
#else
*g_is_ptr++ = (cmph_uint32)g_i;
#endif
// packing h1[i]
hash_state_pack(data->h1[i], g_i);
g_i += hash_state_packed_size(h1_type);
// packing h2[i]
hash_state_pack(data->h2[i], g_i);
g_i += hash_state_packed_size(h2_type);
// packing g_i
switch(data->algo)
{
case CMPH_FCH:
n = fch_calc_b(data->c, data->size[i]);
break;
case CMPH_BMZ8:
n = ceil(data->c * data->size[i]);
break;
default: assert(0);
}
memcpy(g_i, data->g[i], sizeof(cmph_uint8)*n);
g_i += n;
}
}
/** \fn cmph_uint32 brz_packed_size(cmph_t *mphf);
@ -735,7 +836,149 @@ void brz_pack(cmph_t *mphf, void *packed_mphf)
*/
cmph_uint32 brz_packed_size(cmph_t *mphf)
{
return 0;
cmph_uint32 i;
cmph_uint32 size = 0;
brz_data_t *data = (brz_data_t *)mphf->data;
CMPH_HASH h0_type = hash_get_type(data->h0);
CMPH_HASH h1_type = hash_get_type(data->h1[0]);
CMPH_HASH h2_type = hash_get_type(data->h2[0]);
size = (2*sizeof(CMPH_ALGO) + 3*sizeof(CMPH_HASH) + hash_state_packed_size(h0_type) + sizeof(cmph_uint32) +
sizeof(cmph_float32) + sizeof(cmph_uint8)*data->k + sizeof(cmph_uint32)*data->k);
// pointers to g_is
#if defined (__ia64) || defined (__x86_64__)
size += sizeof(cmph_uint64)*data->k;
#else
size += sizeof(cmph_uint32)*data->k;
#endif
size += hash_state_packed_size(h1_type) * data->k;
size += hash_state_packed_size(h2_type) * data->k;
cmph_uint32 n = 0;
for(i = 0; i < data->k; i++)
{
switch(data->algo)
{
case CMPH_FCH:
n = fch_calc_b(data->c, data->size[i]);
break;
case CMPH_BMZ8:
n = ceil(data->c * data->size[i]);
break;
default: assert(0);
}
size += n;
}
return size;
}
static cmph_uint32 brz_bmz8_search_packed(cmph_uint32 *packed_mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint)
{
register CMPH_HASH h0_type = *packed_mphf++;
register cmph_uint32 *h0_ptr = packed_mphf;
packed_mphf = (cmph_uint32 *)(((cmph_uint8 *)packed_mphf) + hash_state_packed_size(h0_type));
register cmph_uint32 k = *packed_mphf++;
register cmph_float32 c = (cmph_float32)(*packed_mphf);
packed_mphf++;
register CMPH_HASH h1_type = *packed_mphf++;
register CMPH_HASH h2_type = *packed_mphf++;
register cmph_uint8 * size = (cmph_uint8 *) packed_mphf;
packed_mphf = (cmph_uint32 *)(size + k);
register cmph_uint32 * offset = packed_mphf;
packed_mphf += k;
register cmph_uint32 h0;
hash_vector_packed(h0_ptr, h0_type, key, keylen, fingerprint);
h0 = fingerprint[2] % k;
register cmph_uint32 m = size[h0];
register cmph_uint32 n = ceil(c * m);
#if defined (__ia64) || defined (__x86_64__)
register cmph_uint64 * g_is_ptr = (cmph_uint64 *)packed_mphf;
#else
register cmph_uint32 * g_is_ptr = packed_mphf;
#endif
register cmph_uint8 * h1_ptr = (cmph_uint8 *) g_is_ptr[h0];
register cmph_uint8 * h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
register cmph_uint8 * g = h2_ptr + hash_state_packed_size(h2_type);
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n;
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n;
register cmph_uint8 mphf_bucket;
if (h1 == h2 && ++h2 >= n) h2 = 0;
mphf_bucket = g[h1] + g[h2];
DEBUGP("key: %s h1: %u h2: %u h0: %u\n", key, h1, h2, h0);
DEBUGP("key: %s g[h1]: %u g[h2]: %u offset[h0]: %u edges: %u\n", key, g[h1], g[h2], >offset[h0], m);
DEBUGP("Address: %u\n", mphf_bucket + offset[h0]);
return (mphf_bucket + offset[h0]);
}
static cmph_uint32 brz_fch_search_packed(cmph_uint32 *packed_mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint)
{
register CMPH_HASH h0_type = *packed_mphf++;
register cmph_uint32 *h0_ptr = packed_mphf;
packed_mphf = (cmph_uint32 *)(((cmph_uint8 *)packed_mphf) + hash_state_packed_size(h0_type));
register cmph_uint32 k = *packed_mphf++;
register cmph_float32 c = (cmph_float32)(*packed_mphf);
packed_mphf++;
register CMPH_HASH h1_type = *packed_mphf++;
register CMPH_HASH h2_type = *packed_mphf++;
register cmph_uint8 * size = (cmph_uint8 *) packed_mphf;
packed_mphf = (cmph_uint32 *)(size + k);
register cmph_uint32 * offset = packed_mphf;
packed_mphf += k;
register cmph_uint32 h0;
hash_vector_packed(h0_ptr, h0_type, key, keylen, fingerprint);
h0 = fingerprint[2] % k;
register cmph_uint32 m = size[h0];
register cmph_uint32 b = fch_calc_b(c, m);
register cmph_float32 p1 = fch_calc_p1(m);
register cmph_float32 p2 = fch_calc_p2(b);
#if defined (__ia64) || defined (__x86_64__)
register cmph_uint64 * g_is_ptr = (cmph_uint64 *)packed_mphf;
#else
register cmph_uint32 * g_is_ptr = packed_mphf;
#endif
register cmph_uint8 * h1_ptr = (cmph_uint8 *) g_is_ptr[h0];
register cmph_uint8 * h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
register cmph_uint8 * g = h2_ptr + hash_state_packed_size(h2_type);
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % m;
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % m;
register cmph_uint8 mphf_bucket = 0;
h1 = mixh10h11h12(b, p1, p2, h1);
mphf_bucket = (h2 + g[h1]) % m;
return (mphf_bucket + offset[h0]);
}
/** cmph_uint32 brz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
@ -747,6 +990,16 @@ cmph_uint32 brz_packed_size(cmph_t *mphf)
*/
cmph_uint32 brz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
{
return 0;
register cmph_uint32 *ptr = (cmph_uint32 *)packed_mphf;
register CMPH_ALGO algo = *ptr++;
cmph_uint32 fingerprint[3];
switch(algo)
{
case CMPH_FCH:
return brz_fch_search_packed(ptr, key, keylen, fingerprint);
case CMPH_BMZ8:
return brz_bmz8_search_packed(ptr, key, keylen, fingerprint);
default: assert(0);
}
}

View File

@ -331,22 +331,33 @@ cmph_uint32 chm_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 ke
void chm_pack(cmph_t *mphf, void *packed_mphf)
{
chm_data_t *data = (chm_data_t *)mphf->data;
cmph_uint32 * ptr = packed_mphf;
cmph_uint8 * ptr = packed_mphf;
// packing h1 type
CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
*((cmph_uint32 *) ptr) = h1_type;
ptr += sizeof(cmph_uint32);
// packing h1
hash_state_pack(data->hashes[0], ptr);
ptr += hash_state_packed_size(h1_type);
ptr += (hash_state_packed_size(data->hashes[0]) >> 2); // (hash_state_packed_size(data->hashes[0]) / 4);
// packing h2 type
CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
*((cmph_uint32 *) ptr) = h2_type;
ptr += sizeof(cmph_uint32);
// packing h2
hash_state_pack(data->hashes[1], ptr);
ptr += (hash_state_packed_size(data->hashes[1]) >> 2); // (hash_state_packed_size(data->hashes[1]) / 4);
ptr += hash_state_packed_size(h2_type);
// packing n
*ptr++ = data->n;
*((cmph_uint32 *) ptr) = data->n;
ptr += sizeof(data->n);
// packing m
*ptr++ = data->m;
*((cmph_uint32 *) ptr) = data->m;
ptr += sizeof(data->m);
// packing g
memcpy(ptr, data->g, sizeof(cmph_uint32)*data->n);
@ -360,7 +371,11 @@ void chm_pack(cmph_t *mphf, void *packed_mphf)
cmph_uint32 chm_packed_size(cmph_t *mphf)
{
chm_data_t *data = (chm_data_t *)mphf->data;
return (sizeof(CMPH_ALGO) + 2*hash_state_packed_size(data->hashes[0]) + 2*sizeof(cmph_uint32) + sizeof(cmph_uint32)*data->n);
CMPH_HASH h1_type = hash_get_type(data->hashes[0]);
CMPH_HASH h2_type = hash_get_type(data->hashes[1]);
return (sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
4*sizeof(cmph_uint32) + sizeof(cmph_uint32)*data->n);
}
/** cmph_uint32 chm_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
@ -372,22 +387,21 @@ cmph_uint32 chm_packed_size(cmph_t *mphf)
*/
cmph_uint32 chm_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
{
register cmph_uint32 *h1_ptr = (cmph_uint32 *)packed_mphf;
register cmph_uint32 h1_size = *h1_ptr;
register cmph_uint8 *h1_ptr = packed_mphf;
register CMPH_HASH h1_type = *((cmph_uint32 *)h1_ptr);
h1_ptr += 4;
// fprintf(stderr, "h1_size:%u\n", h1_size);
register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr);
h2_ptr += 4;
register cmph_uint32 *h2_ptr = h1_ptr + (h1_size >> 2); // h1_ptr + h1_size/4
register cmph_uint32 h2_size = *h2_ptr;
// fprintf(stderr, "h2_size:%u\n", h2_size);
register cmph_uint32 *g_ptr = h2_ptr + (h2_size >> 2); // h2_ptr + h2_size/4
register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type));
register cmph_uint32 n = *g_ptr++;
register cmph_uint32 m = *g_ptr++;
register cmph_uint32 h1 = hash_packed(h1_ptr, key, keylen) % n;
register cmph_uint32 h2 = hash_packed(h2_ptr, key, keylen) % n;
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n;
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n;
DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
if (h1 == h2 && ++h2 >= n) h2 = 0;
DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, g_ptr[h1], g_ptr[h2], m);

View File

@ -304,6 +304,8 @@ void cmph_config_set_memory_availability(cmph_config_t *mph, cmph_uint32 memory_
void cmph_config_destroy(cmph_config_t *mph)
{
if(mph)
{
DEBUGP("Destroying mph with algo %s\n", cmph_names[mph->algo]);
switch (mph->algo)
{
@ -332,6 +334,7 @@ void cmph_config_destroy(cmph_config_t *mph)
assert(0);
}
__config_destroy(mph);
}
}
void cmph_config_set_verbosity(cmph_config_t *mph, cmph_uint32 verbosity)

View File

@ -6,6 +6,28 @@ typedef unsigned short cmph_uint16;
typedef unsigned int cmph_uint32;
typedef float cmph_float32;
#if defined (__ia64) || defined (__x86_64__)
/** \typedef long cmph_int64;
* \brief 64-bit integer for a 64-bit achitecture.
*/
typedef long cmph_int64;
/** \typedef unsigned long cmph_uint64;
* \brief Unsigned 64-bit integer for a 64-bit achitecture.
*/
typedef unsigned long cmph_uint64;
#else
/** \typedef long long cmph_int64;
* \brief 64-bit integer for a 32-bit achitecture.
*/
typedef long long cmph_int64;
/** \typedef unsigned long long cmph_uint64;
* \brief Unsigned 64-bit integer for a 32-bit achitecture.
*/
typedef unsigned long long cmph_uint64;
#endif
typedef enum { CMPH_HASH_JENKINS, CMPH_HASH_COUNT } CMPH_HASH;
extern const char *cmph_hash_names[];
typedef enum { CMPH_BMZ, CMPH_BMZ8, CMPH_CHM, CMPH_BRZ, CMPH_FCH,

View File

@ -425,7 +425,17 @@ void fch_destroy(cmph_t *mphf)
*/
cmph_uint32 fch_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint)
{
return 0;
register fch_data_t *fch = mphf->data;
hash_vector(fch->h1, key, keylen, fingerprint);
register cmph_uint32 h1 = fingerprint[2] % fch->m;
hash_vector(fch->h2, key, keylen, fingerprint);
register cmph_uint32 h2 = fingerprint[2] % fch->m;
h1 = mixh10h11h12 (fch->b, fch->p1, fch->p2, h1);
//DEBUGP("key: %s h1: %u h2: %u g[h1]: %u\n", key, h1, h2, fch->g[h1]);
return (h2 + fch->g[h1]) % fch->m;
}
/** \fn void fch_pack(cmph_t *mphf, void *packed_mphf);
@ -435,6 +445,45 @@ cmph_uint32 fch_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 ke
*/
void fch_pack(cmph_t *mphf, void *packed_mphf)
{
fch_data_t *data = (fch_data_t *)mphf->data;
cmph_uint8 * ptr = packed_mphf;
// packing h1 type
CMPH_HASH h1_type = hash_get_type(data->h1);
*((cmph_uint32 *) ptr) = h1_type;
ptr += sizeof(cmph_uint32);
// packing h1
hash_state_pack(data->h1, ptr);
ptr += hash_state_packed_size(h1_type);
// packing h2 type
CMPH_HASH h2_type = hash_get_type(data->h2);
*((cmph_uint32 *) ptr) = h2_type;
ptr += sizeof(cmph_uint32);
// packing h2
hash_state_pack(data->h2, ptr);
ptr += hash_state_packed_size(h2_type);
// packing m
*((cmph_uint32 *) ptr) = data->m;
ptr += sizeof(data->m);
// packing b
*((cmph_uint32 *) ptr) = data->b;
ptr += sizeof(data->b);
// packing p1
*((cmph_uint32 *)ptr) = (cmph_uint32)data->p1;
ptr += sizeof(data->p1);
// packing p2
*((cmph_uint32 *)ptr) = (cmph_uint32)data->p2;
ptr += sizeof(data->p2);
// packing g
memcpy(ptr, data->g, sizeof(cmph_uint32)*(data->b));
}
/** \fn cmph_uint32 fch_packed_size(cmph_t *mphf);
@ -444,7 +493,12 @@ void fch_pack(cmph_t *mphf, void *packed_mphf)
*/
cmph_uint32 fch_packed_size(cmph_t *mphf)
{
return 0;
fch_data_t *data = (fch_data_t *)mphf->data;
CMPH_HASH h1_type = hash_get_type(data->h1);
CMPH_HASH h2_type = hash_get_type(data->h2);
return (sizeof(CMPH_ALGO) + hash_state_packed_size(h1_type) + hash_state_packed_size(h2_type) +
4*sizeof(cmph_uint32) + 2*sizeof(cmph_float32) + sizeof(cmph_uint32)*(data->b));
}
@ -457,6 +511,31 @@ cmph_uint32 fch_packed_size(cmph_t *mphf)
*/
cmph_uint32 fch_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
{
return 0;
register cmph_uint8 *h1_ptr = packed_mphf;
register CMPH_HASH h1_type = *((cmph_uint32 *)h1_ptr);
h1_ptr += 4;
register cmph_uint8 *h2_ptr = h1_ptr + hash_state_packed_size(h1_type);
register CMPH_HASH h2_type = *((cmph_uint32 *)h2_ptr);
h2_ptr += 4;
register cmph_uint32 *g_ptr = (cmph_uint32 *)(h2_ptr + hash_state_packed_size(h2_type));
register cmph_uint32 m = *g_ptr++;
register cmph_uint32 b = *g_ptr++;
register cmph_float32 p1 = (cmph_float32)(*g_ptr);
g_ptr++;
register cmph_float32 p2 = (cmph_float32)(*g_ptr);
g_ptr++;
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % m;
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % m;
h1 = mixh10h11h12 (b, p1, p2, h1);
return (h2 + g_ptr[h1]) % m;
}

View File

@ -124,28 +124,21 @@ void hash_state_destroy(hash_state_t *state)
return;
}
/** \fn void hash_state_pack(hash_state_t *state, void *hash_packed);
/** \fn void hash_state_pack(hash_state_t *state, void *hash_packed)
* \brief Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed.
* \param state points to the hash function
* \param hash_packed pointer to the contiguous memory area used to store the hash function. The size of hash_packed must be at least hash_state_packed_size()
*
* Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed.
* However, the hash function type must be packed outside.
*/
void hash_state_pack(hash_state_t *state, void *hash_packed)
{
cmph_uint32 * ptr = (cmph_uint32 *)hash_packed;
cmph_uint32 * ptr_size = ptr++;
// Reserve space for the hash function size
*ptr_size = 0;
// Pack the hash function type
*ptr++ = state->hashfunc;
switch (state->hashfunc)
{
case CMPH_HASH_JENKINS:
// pack the jenkins hash function
jenkins_state_pack((jenkins_state_t *)state, ptr);
*ptr_size = sizeof(cmph_uint32) + sizeof(CMPH_HASH) + jenkins_state_packed_size();
jenkins_state_pack((jenkins_state_t *)state, hash_packed);
break;
default:
assert(0);
@ -153,15 +146,15 @@ void hash_state_pack(hash_state_t *state, void *hash_packed)
return;
}
/** \fn cmph_uint32 hash_state_packed_size(hash_state_t *state);
/** \fn cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc)
* \brief Return the amount of space needed to pack a hash function.
* \param state points to a hash function
* \param hashfunc function type
* \return the size of the packed function or zero for failures
*/
cmph_uint32 hash_state_packed_size(hash_state_t *state)
cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc)
{
cmph_uint32 size = sizeof(cmph_uint32) + sizeof(CMPH_HASH);
switch (state->hashfunc)
cmph_uint32 size = 0;
switch (hashfunc)
{
case CMPH_HASH_JENKINS:
size += jenkins_state_packed_size();
@ -172,23 +165,19 @@ cmph_uint32 hash_state_packed_size(hash_state_t *state)
return size;
}
/** \fn cmph_uint32 hash_packed(void *hash_packed, const char *k, cmph_uint32 keylen)
/** \fn cmph_uint32 hash_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen)
* \param hash_packed is a pointer to a contiguous memory area
* \param hashfunc is the type of the hash function packed in hash_packed
* \param key is a pointer to a key
* \param keylen is the key length
* \return an integer that represents a hash value of 32 bits.
*/
cmph_uint32 hash_packed(void *hash_packed, const char *k, cmph_uint32 keylen)
cmph_uint32 hash_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen)
{
register cmph_uint32 * ptr = (((cmph_uint32 *) hash_packed) + 1);
register CMPH_HASH hashfunc = *ptr++;
switch (hashfunc)
{
case CMPH_HASH_JENKINS:
return jenkins_hash_packed(ptr, k, keylen);
return jenkins_hash_packed(hash_packed, k, keylen);
default:
assert(0);
}
@ -196,23 +185,30 @@ cmph_uint32 hash_packed(void *hash_packed, const char *k, cmph_uint32 keylen)
return 0;
}
/** \fn hash_vector_packed(void *hash_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
/** \fn hash_vector_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
* \param hash_packed is a pointer to a contiguous memory area
* \param key is a pointer to a key
* \param keylen is the key length
* \param hashes is a pointer to a memory large enough to fit three 32-bit integers.
*/
void hash_vector_packed(void *hash_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
void hash_vector_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
{
cmph_uint32 * ptr = (((cmph_uint32 *) hash_packed) + 1);
CMPH_HASH hashfunc = *ptr++;
switch (hashfunc)
{
case CMPH_HASH_JENKINS:
jenkins_hash_vector_packed(ptr, k, keylen, hashes);
jenkins_hash_vector_packed(hash_packed, k, keylen, hashes);
break;
default:
assert(0);
}
}
/** \fn CMPH_HASH hash_get_type(hash_state_t *state);
* \param state is a pointer to a hash_state_t structure
* \return the hash function type pointed by state
*/
CMPH_HASH hash_get_type(hash_state_t *state)
{
return state->hashfunc;
}

View File

@ -35,31 +35,42 @@ void hash_state_destroy(hash_state_t *state);
* \brief Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed.
* \param state points to the hash function
* \param hash_packed pointer to the contiguous memory area used to store the hash function. The size of hash_packed must be at least hash_state_packed_size()
*
* Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed.
* However, the hash function type must be packed outside.
*/
void hash_state_pack(hash_state_t *state, void *hash_packed);
/** \fn cmph_uint32 hash_state_packed_size(hash_state_t *state);
* \brief Return the amount of space needed to pack a hash function.
* \param state points to a hash function
* \return the size of the packed function or zero for failures
*/
cmph_uint32 hash_state_packed_size(hash_state_t *state);
/** \fn cmph_uint32 hash_packed(void *hash_packed, const char *k, cmph_uint32 keylen);
/** \fn cmph_uint32 hash_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen);
* \param hash_packed is a pointer to a contiguous memory area
* \param hashfunc is the type of the hash function packed in hash_packed
* \param key is a pointer to a key
* \param keylen is the key length
* \return an integer that represents a hash value of 32 bits.
*/
cmph_uint32 hash_packed(void *hash_packed, const char *k, cmph_uint32 keylen);
cmph_uint32 hash_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen);
/** \fn hash_vector_packed(void *hash_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
/** \fn cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc)
* \brief Return the amount of space needed to pack a hash function.
* \param hashfunc function type
* \return the size of the packed function or zero for failures
*/
cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc);
/** \fn hash_vector_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
* \param hash_packed is a pointer to a contiguous memory area
* \param key is a pointer to a key
* \param keylen is the key length
* \param hashes is a pointer to a memory large enough to fit three 32-bit integers.
*/
void hash_vector_packed(void *hash_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
void hash_vector_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
/** \fn CMPH_HASH hash_get_type(hash_state_t *state);
* \param state is a pointer to a hash_state_t structure
* \return the hash function type pointed by state
*/
CMPH_HASH hash_get_type(hash_state_t *state);
#endif

View File

@ -241,6 +241,7 @@ int main(int argc, char **argv)
if(mph_algo == CMPH_BMZ && c >= 2.0) c=1.15;
if (c != 0) cmph_config_set_graphsize(config, c);
mphf = cmph_new(config);
cmph_config_destroy(config);
if (mphf == NULL)
{