*** empty log message ***
This commit is contained in:
parent
1f42de94f2
commit
d63806a90a
128
src/bdz.c
128
src/bdz.c
@ -214,7 +214,7 @@ static int bdz_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_que
|
|||||||
static int bdz_mapping(cmph_config_t *mph, bdz_graph3_t* graph3, bdz_queue_t queue);
|
static int bdz_mapping(cmph_config_t *mph, bdz_graph3_t* graph3, bdz_queue_t queue);
|
||||||
static void assigning(bdz_config_data_t *bdz, bdz_graph3_t* graph3, bdz_queue_t queue);
|
static void assigning(bdz_config_data_t *bdz, bdz_graph3_t* graph3, bdz_queue_t queue);
|
||||||
static void ranking(bdz_config_data_t *bdz);
|
static void ranking(bdz_config_data_t *bdz);
|
||||||
static cmph_uint32 rank(bdz_data_t *bdz, cmph_uint32 vertex);
|
static cmph_uint32 rank(cmph_uint8 b, cmph_uint32 * ranktable, cmph_uint8 * g, cmph_uint32 vertex);
|
||||||
|
|
||||||
bdz_config_data_t *bdz_config_new()
|
bdz_config_data_t *bdz_config_new()
|
||||||
{
|
{
|
||||||
@ -553,22 +553,22 @@ cmph_uint32 bdz_search_ph(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
|||||||
return vertex;
|
return vertex;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline cmph_uint32 rank(bdz_data_t *bdz, cmph_uint32 vertex)
|
static inline cmph_uint32 rank(cmph_uint8 b, cmph_uint32 * ranktable, cmph_uint8 * g, cmph_uint32 vertex)
|
||||||
{
|
{
|
||||||
cmph_uint32 index = vertex >> bdz->b;
|
register cmph_uint32 index = vertex >> b;
|
||||||
cmph_uint32 base_rank = bdz->ranktable[index];
|
register cmph_uint32 base_rank = ranktable[index];
|
||||||
cmph_uint32 beg_idx_v = index << bdz->b;
|
register cmph_uint32 beg_idx_v = index << b;
|
||||||
cmph_uint32 beg_idx_b = beg_idx_v >> 2;
|
register cmph_uint32 beg_idx_b = beg_idx_v >> 2;
|
||||||
cmph_uint32 end_idx_b = vertex >> 2;
|
register cmph_uint32 end_idx_b = vertex >> 2;
|
||||||
while(beg_idx_b < end_idx_b)
|
while(beg_idx_b < end_idx_b)
|
||||||
{
|
{
|
||||||
base_rank += bdz_lookup_table[*(bdz->g + beg_idx_b++)];
|
base_rank += bdz_lookup_table[*(g + beg_idx_b++)];
|
||||||
|
|
||||||
}
|
}
|
||||||
beg_idx_v = beg_idx_b << 2;
|
beg_idx_v = beg_idx_b << 2;
|
||||||
while(beg_idx_v < vertex)
|
while(beg_idx_v < vertex)
|
||||||
{
|
{
|
||||||
if(GETVALUE(bdz->g, beg_idx_v) != UNASSIGNED) base_rank++;
|
if(GETVALUE(g, beg_idx_v) != UNASSIGNED) base_rank++;
|
||||||
beg_idx_v++;
|
beg_idx_v++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -577,15 +577,15 @@ static inline cmph_uint32 rank(bdz_data_t *bdz, cmph_uint32 vertex)
|
|||||||
|
|
||||||
cmph_uint32 bdz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
cmph_uint32 bdz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||||
{
|
{
|
||||||
bdz_data_t *bdz = mphf->data;
|
register cmph_uint32 vertex;
|
||||||
|
register bdz_data_t *bdz = mphf->data;
|
||||||
cmph_uint32 hl[3];
|
cmph_uint32 hl[3];
|
||||||
hash_vector(bdz->hl, key, keylen, hl);
|
hash_vector(bdz->hl, key, keylen, hl);
|
||||||
cmph_uint32 vertex;
|
|
||||||
hl[0] = hl[0] % bdz->r;
|
hl[0] = hl[0] % bdz->r;
|
||||||
hl[1] = hl[1] % bdz->r + bdz->r;
|
hl[1] = hl[1] % bdz->r + bdz->r;
|
||||||
hl[2] = hl[2] % bdz->r + (bdz->r << 1);
|
hl[2] = hl[2] % bdz->r + (bdz->r << 1);
|
||||||
vertex = hl[(GETVALUE(bdz->g, hl[0]) + GETVALUE(bdz->g, hl[1]) + GETVALUE(bdz->g, hl[2])) % 3];
|
vertex = hl[(GETVALUE(bdz->g, hl[0]) + GETVALUE(bdz->g, hl[1]) + GETVALUE(bdz->g, hl[2])) % 3];
|
||||||
return rank(bdz, vertex);
|
return rank(bdz->b, bdz->ranktable, bdz->g, vertex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -598,3 +598,107 @@ void bdz_destroy(cmph_t *mphf)
|
|||||||
free(data);
|
free(data);
|
||||||
free(mphf);
|
free(mphf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** cmph_uint32 bdz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint);
|
||||||
|
* \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., figerprint should be a prealocated area to fit three 4-byte integers).
|
||||||
|
* \param mphf pointer to the resulting function
|
||||||
|
* \param key is the key to be hashed
|
||||||
|
* \param keylen is the key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*
|
||||||
|
* Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be
|
||||||
|
* a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes
|
||||||
|
* as fingerprint. According to the application, just few bits can be enough, once mphf does
|
||||||
|
* not allow collisions for the keys previously known.
|
||||||
|
*/
|
||||||
|
cmph_uint32 bdz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint)
|
||||||
|
{
|
||||||
|
register cmph_uint32 vertex;
|
||||||
|
register bdz_data_t *bdz = mphf->data;
|
||||||
|
cmph_uint32 hl[3];
|
||||||
|
|
||||||
|
hash_vector(bdz->hl, key, keylen, hl);
|
||||||
|
memcpy(fingerprint, hl, sizeof(hl));
|
||||||
|
hl[0] = hl[0] % bdz->r;
|
||||||
|
hl[1] = hl[1] % bdz->r + bdz->r;
|
||||||
|
hl[2] = hl[2] % bdz->r + (bdz->r << 1);
|
||||||
|
vertex = hl[(GETVALUE(bdz->g, hl[0]) + GETVALUE(bdz->g, hl[1]) + GETVALUE(bdz->g, hl[2])) % 3];
|
||||||
|
return rank(bdz->b, bdz->ranktable, bdz->g, vertex);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn void bdz_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void bdz_pack(cmph_t *mphf, void *packed_mphf)
|
||||||
|
{
|
||||||
|
bdz_data_t *data = (bdz_data_t *)mphf->data;
|
||||||
|
cmph_uint32 * ptr = packed_mphf;
|
||||||
|
|
||||||
|
// packing hl
|
||||||
|
hash_state_pack(data->hl, ptr);
|
||||||
|
|
||||||
|
|
||||||
|
ptr += (hash_state_packed_size(data->hl) >> 2); // (hash_state_packed_size(data->hl) / 4);
|
||||||
|
|
||||||
|
// packing r
|
||||||
|
*ptr++ = data->r;
|
||||||
|
|
||||||
|
// packing ranktablesize
|
||||||
|
*ptr++ = data->ranktablesize;
|
||||||
|
|
||||||
|
// packing ranktable
|
||||||
|
memcpy(ptr, data->ranktable, sizeof(cmph_uint32)*(data->ranktablesize));
|
||||||
|
ptr += data->ranktablesize;
|
||||||
|
|
||||||
|
cmph_uint8 * ptr8 = (cmph_uint8 *) ptr;
|
||||||
|
|
||||||
|
// packing b
|
||||||
|
*ptr8++ = data->b;
|
||||||
|
|
||||||
|
// packing g
|
||||||
|
memcpy(ptr8, data->g, sizeof(cmph_uint8)*((data->n >> 2) +1));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 bdz_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 bdz_packed_size(cmph_t *mphf)
|
||||||
|
{
|
||||||
|
bdz_data_t *data = (bdz_data_t *)mphf->data;
|
||||||
|
return (sizeof(CMPH_ALGO) + hash_state_packed_size(data->hl) + (sizeof(cmph_uint32) << 1) + sizeof(cmph_uint32)*(data->ranktablesize) + sizeof(cmph_uint8) + sizeof(cmph_uint8)*((data->n >> 2) +1));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** cmph_uint32 bdz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint32 bdz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
register cmph_uint32 vertex;
|
||||||
|
register cmph_uint32 *hl_ptr = (cmph_uint32 *)packed_mphf;
|
||||||
|
register cmph_uint32 hl_size = *hl_ptr;
|
||||||
|
register cmph_uint32 *ptr = hl_ptr + (hl_size >> 2); // h2_ptr + h2_size/4
|
||||||
|
|
||||||
|
register cmph_uint32 r = *ptr++;
|
||||||
|
register cmph_uint32 ranktablesize = *ptr++;
|
||||||
|
register cmph_uint32 *ranktable = ptr;
|
||||||
|
ptr += ranktablesize;
|
||||||
|
|
||||||
|
register cmph_uint8 * g = (cmph_uint8 *)ptr;
|
||||||
|
register cmph_uint8 b = *g++;
|
||||||
|
|
||||||
|
cmph_uint32 hl[3];
|
||||||
|
hash_vector_packed(hl_ptr, key, keylen, hl);
|
||||||
|
hl[0] = hl[0] % r;
|
||||||
|
hl[1] = hl[1] % r + r;
|
||||||
|
hl[2] = hl[2] % r + (r << 1);
|
||||||
|
vertex = hl[(GETVALUE(g, hl[0]) + GETVALUE(g, hl[1]) + GETVALUE(g, hl[2])) % 3];
|
||||||
|
return rank(b, ranktable, g, vertex);
|
||||||
|
}
|
||||||
|
38
src/bdz.h
38
src/bdz.h
@ -16,4 +16,42 @@ void bdz_load(FILE *f, cmph_t *mphf);
|
|||||||
int bdz_dump(cmph_t *mphf, FILE *f);
|
int bdz_dump(cmph_t *mphf, FILE *f);
|
||||||
void bdz_destroy(cmph_t *mphf);
|
void bdz_destroy(cmph_t *mphf);
|
||||||
cmph_uint32 bdz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
|
cmph_uint32 bdz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
/** cmph_uint32 bdz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint);
|
||||||
|
* \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., figerprint should be a prealocated area to fit three 4-byte integers).
|
||||||
|
* \param mphf pointer to the resulting function
|
||||||
|
* \param key is the key to be hashed
|
||||||
|
* \param keylen is the key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*
|
||||||
|
* Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be
|
||||||
|
* a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes
|
||||||
|
* as fingerprint. According to the application, just few bits can be enough, once mphf does
|
||||||
|
* not allow collisions for the keys previously known.
|
||||||
|
*/
|
||||||
|
cmph_uint32 bdz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint);
|
||||||
|
|
||||||
|
/** \fn void bdz_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void bdz_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 bdz_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 bdz_packed_size(cmph_t *mphf);
|
||||||
|
|
||||||
|
/** cmph_uint32 bdz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint32 bdz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
121
src/bdz_ph.c
121
src/bdz_ph.c
@ -478,21 +478,23 @@ void bdz_ph_load(FILE *f, cmph_t *mphf)
|
|||||||
bdz_ph->g = (cmph_uint8 *)calloc((bdz_ph->n/5)+1, sizeof(cmph_uint8));
|
bdz_ph->g = (cmph_uint8 *)calloc((bdz_ph->n/5)+1, sizeof(cmph_uint8));
|
||||||
fread(bdz_ph->g, ((bdz_ph->n/5)+1)*sizeof(cmph_uint8), 1, f);
|
fread(bdz_ph->g, ((bdz_ph->n/5)+1)*sizeof(cmph_uint8), 1, f);
|
||||||
|
|
||||||
#ifdef DEBUG
|
/* #ifdef DEBUG
|
||||||
|
cmph_uint32 i;
|
||||||
fprintf(stderr, "G: ");
|
fprintf(stderr, "G: ");
|
||||||
for (i = 0; i < bdz_ph->n; ++i) fprintf(stderr, "%u ", GETVALUE(bdz_ph->g,i));
|
for (i = 0; i < bdz_ph->n; ++i) fprintf(stderr, "%u ", GETVALUE(bdz_ph->g,i));
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
#endif
|
#endif
|
||||||
|
*/
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
cmph_uint32 bdz_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
cmph_uint32 bdz_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
||||||
{
|
{
|
||||||
bdz_ph_data_t *bdz_ph = mphf->data;
|
register bdz_ph_data_t *bdz_ph = mphf->data;
|
||||||
cmph_uint32 hl[3];
|
cmph_uint32 hl[3];
|
||||||
cmph_uint8 byte0, byte1, byte2;
|
register cmph_uint8 byte0, byte1, byte2;
|
||||||
cmph_uint32 vertex;
|
register cmph_uint32 vertex;
|
||||||
|
|
||||||
hash_vector(bdz_ph->hl, key, keylen,hl);
|
hash_vector(bdz_ph->hl, key, keylen,hl);
|
||||||
hl[0] = hl[0] % bdz_ph->r;
|
hl[0] = hl[0] % bdz_ph->r;
|
||||||
@ -520,3 +522,114 @@ void bdz_ph_destroy(cmph_t *mphf)
|
|||||||
free(data);
|
free(data);
|
||||||
free(mphf);
|
free(mphf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** cmph_uint32 bdz_ph_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint);
|
||||||
|
* \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., figerprint should be a prealocated area to fit three 4-byte integers).
|
||||||
|
* \param mphf pointer to the resulting function
|
||||||
|
* \param key is the key to be hashed
|
||||||
|
* \param keylen is the key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*
|
||||||
|
* Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be
|
||||||
|
* a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes
|
||||||
|
* as fingerprint. According to the application, just few bits can be enough, once mphf does
|
||||||
|
* not allow collisions for the keys previously known.
|
||||||
|
*/
|
||||||
|
cmph_uint32 bdz_ph_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint)
|
||||||
|
{
|
||||||
|
register bdz_ph_data_t *bdz_ph = mphf->data;
|
||||||
|
cmph_uint32 hl[3];
|
||||||
|
register cmph_uint8 byte0, byte1, byte2;
|
||||||
|
register cmph_uint32 vertex;
|
||||||
|
|
||||||
|
hash_vector(bdz_ph->hl, key, keylen,hl);
|
||||||
|
memcpy(fingerprint, hl, sizeof(hl));
|
||||||
|
|
||||||
|
hl[0] = hl[0] % bdz_ph->r;
|
||||||
|
hl[1] = hl[1] % bdz_ph->r + bdz_ph->r;
|
||||||
|
hl[2] = hl[2] % bdz_ph->r + (bdz_ph->r << 1);
|
||||||
|
|
||||||
|
byte0 = bdz_ph->g[hl[0]/5];
|
||||||
|
byte1 = bdz_ph->g[hl[1]/5];
|
||||||
|
byte2 = bdz_ph->g[hl[2]/5];
|
||||||
|
|
||||||
|
byte0 = lookup_table[hl[0]%5][byte0];
|
||||||
|
byte1 = lookup_table[hl[1]%5][byte1];
|
||||||
|
byte2 = lookup_table[hl[2]%5][byte2];
|
||||||
|
vertex = hl[(byte0 + byte1 + byte2)%3];
|
||||||
|
|
||||||
|
return vertex;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn void bdz_ph_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void bdz_ph_pack(cmph_t *mphf, void *packed_mphf)
|
||||||
|
{
|
||||||
|
bdz_ph_data_t *data = (bdz_ph_data_t *)mphf->data;
|
||||||
|
cmph_uint32 * ptr = packed_mphf;
|
||||||
|
|
||||||
|
// packing hl
|
||||||
|
hash_state_pack(data->hl, ptr);
|
||||||
|
|
||||||
|
|
||||||
|
ptr += (hash_state_packed_size(data->hl) >> 2); // (hash_state_packed_size(data->hl) / 4);
|
||||||
|
|
||||||
|
// packing r
|
||||||
|
*ptr++ = data->r;
|
||||||
|
|
||||||
|
// packing g
|
||||||
|
memcpy(ptr, data->g, sizeof(cmph_uint8)*((data->n/5)+1));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 bdz_ph_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 bdz_ph_packed_size(cmph_t *mphf)
|
||||||
|
{
|
||||||
|
bdz_ph_data_t *data = (bdz_ph_data_t *)mphf->data;
|
||||||
|
return (sizeof(CMPH_ALGO) + hash_state_packed_size(data->hl) + sizeof(cmph_uint32) + sizeof(cmph_uint8)*((data->n/5)+1));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** cmph_uint32 bdz_ph_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint32 bdz_ph_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
|
||||||
|
register cmph_uint32 *hl_ptr = (cmph_uint32 *)packed_mphf;
|
||||||
|
register cmph_uint32 hl_size = *hl_ptr;
|
||||||
|
register cmph_uint32 *ptr = hl_ptr + (hl_size >> 2); // h2_ptr + h2_size/4
|
||||||
|
|
||||||
|
register cmph_uint32 r = *ptr++;
|
||||||
|
register cmph_uint8 * g = (cmph_uint8 *)ptr;
|
||||||
|
|
||||||
|
cmph_uint32 hl[3];
|
||||||
|
register cmph_uint8 byte0, byte1, byte2;
|
||||||
|
register cmph_uint32 vertex;
|
||||||
|
|
||||||
|
hash_vector_packed(hl_ptr, key, keylen, hl);
|
||||||
|
|
||||||
|
hl[0] = hl[0] % r;
|
||||||
|
hl[1] = hl[1] % r + r;
|
||||||
|
hl[2] = hl[2] % r + (r << 1);
|
||||||
|
|
||||||
|
byte0 = g[hl[0]/5];
|
||||||
|
byte1 = g[hl[1]/5];
|
||||||
|
byte2 = g[hl[2]/5];
|
||||||
|
|
||||||
|
byte0 = lookup_table[hl[0]%5][byte0];
|
||||||
|
byte1 = lookup_table[hl[1]%5][byte1];
|
||||||
|
byte2 = lookup_table[hl[2]%5][byte2];
|
||||||
|
vertex = hl[(byte0 + byte1 + byte2)%3];
|
||||||
|
|
||||||
|
return vertex;
|
||||||
|
}
|
||||||
|
38
src/bdz_ph.h
38
src/bdz_ph.h
@ -15,4 +15,42 @@ void bdz_ph_load(FILE *f, cmph_t *mphf);
|
|||||||
int bdz_ph_dump(cmph_t *mphf, FILE *f);
|
int bdz_ph_dump(cmph_t *mphf, FILE *f);
|
||||||
void bdz_ph_destroy(cmph_t *mphf);
|
void bdz_ph_destroy(cmph_t *mphf);
|
||||||
cmph_uint32 bdz_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
|
cmph_uint32 bdz_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
/** cmph_uint32 bdz_ph_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint);
|
||||||
|
* \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., figerprint should be a prealocated area to fit three 4-byte integers).
|
||||||
|
* \param mphf pointer to the resulting function
|
||||||
|
* \param key is the key to be hashed
|
||||||
|
* \param keylen is the key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*
|
||||||
|
* Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be
|
||||||
|
* a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes
|
||||||
|
* as fingerprint. According to the application, just few bits can be enough, once mphf does
|
||||||
|
* not allow collisions for the keys previously known.
|
||||||
|
*/
|
||||||
|
cmph_uint32 bdz_ph_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint);
|
||||||
|
|
||||||
|
/** \fn void bdz_ph_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void bdz_ph_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 bdz_ph_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 bdz_ph_packed_size(cmph_t *mphf);
|
||||||
|
|
||||||
|
/** cmph_uint32 bdz_ph_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint32 bdz_ph_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
93
src/bmz.c
93
src/bmz.c
@ -538,3 +538,96 @@ void bmz_destroy(cmph_t *mphf)
|
|||||||
free(data);
|
free(data);
|
||||||
free(mphf);
|
free(mphf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** cmph_uint32 bmz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint);
|
||||||
|
* \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., figerprint should be a prealocated area to fit three 4-byte integers).
|
||||||
|
* \param mphf pointer to the resulting function
|
||||||
|
* \param key is the key to be hashed
|
||||||
|
* \param keylen is the key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*
|
||||||
|
* Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be
|
||||||
|
* a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes
|
||||||
|
* as fingerprint. According to the application, just few bits can be enough, once mphf does
|
||||||
|
* not allow collisions for the keys previously known.
|
||||||
|
*/
|
||||||
|
cmph_uint32 bmz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint)
|
||||||
|
{
|
||||||
|
bmz_data_t *bmz = mphf->data;
|
||||||
|
cmph_uint32 h1, h2;
|
||||||
|
|
||||||
|
hash_vector(bmz->hashes[0], key, keylen, fingerprint);
|
||||||
|
h1 = fingerprint[2] % bmz->n;
|
||||||
|
|
||||||
|
hash_vector(bmz->hashes[1], key, keylen, fingerprint);
|
||||||
|
h2 = fingerprint[2] % bmz->n;
|
||||||
|
|
||||||
|
DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
|
||||||
|
if (h1 == h2 && ++h2 > bmz->n) h2 = 0;
|
||||||
|
DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, bmz->g[h1], bmz->g[h2], bmz->m);
|
||||||
|
return bmz->g[h1] + bmz->g[h2];
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn void bmz_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void bmz_pack(cmph_t *mphf, void *packed_mphf)
|
||||||
|
{
|
||||||
|
bmz_data_t *data = (bmz_data_t *)mphf->data;
|
||||||
|
cmph_uint32 * ptr = packed_mphf;
|
||||||
|
|
||||||
|
// packing h1
|
||||||
|
hash_state_pack(data->hashes[0], ptr);
|
||||||
|
|
||||||
|
ptr += (hash_state_packed_size(data->hashes[0]) >> 2); // (hash_state_packed_size(data->hashes[0]) / 4);
|
||||||
|
|
||||||
|
// packing h2
|
||||||
|
hash_state_pack(data->hashes[1], ptr);
|
||||||
|
ptr += (hash_state_packed_size(data->hashes[1]) >> 2); // (hash_state_packed_size(data->hashes[1]) / 4);
|
||||||
|
|
||||||
|
// packing n
|
||||||
|
*ptr++ = data->n;
|
||||||
|
|
||||||
|
// packing g
|
||||||
|
memcpy(ptr, data->g, sizeof(cmph_uint32)*data->n);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 bmz_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 bmz_packed_size(cmph_t *mphf)
|
||||||
|
{
|
||||||
|
bmz_data_t *data = (bmz_data_t *)mphf->data;
|
||||||
|
return (sizeof(CMPH_ALGO) + 2*hash_state_packed_size(data->hashes[0]) + sizeof(cmph_uint32) + sizeof(cmph_uint32)*data->n);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** cmph_uint32 bmz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint32 bmz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
register cmph_uint32 *h1_ptr = (cmph_uint32 *)packed_mphf;
|
||||||
|
register cmph_uint32 h1_size = *h1_ptr;
|
||||||
|
|
||||||
|
register cmph_uint32 *h2_ptr = h1_ptr + (h1_size >> 2); // h1_ptr + h1_size/4
|
||||||
|
register cmph_uint32 h2_size = *h2_ptr;
|
||||||
|
|
||||||
|
register cmph_uint32 *g_ptr = h2_ptr + (h2_size >> 2); // h2_ptr + h2_size/4
|
||||||
|
|
||||||
|
register cmph_uint32 n = *g_ptr++;
|
||||||
|
|
||||||
|
register cmph_uint32 h1 = hash_packed(h1_ptr, key, keylen) % n;
|
||||||
|
register cmph_uint32 h2 = hash_packed(h2_ptr, key, keylen) % n;
|
||||||
|
|
||||||
|
if (h1 == h2 && ++h2 > n) h2 = 0;
|
||||||
|
|
||||||
|
return (g_ptr[h1] + g_ptr[h2]);
|
||||||
|
}
|
||||||
|
38
src/bmz.h
38
src/bmz.h
@ -15,4 +15,42 @@ void bmz_load(FILE *f, cmph_t *mphf);
|
|||||||
int bmz_dump(cmph_t *mphf, FILE *f);
|
int bmz_dump(cmph_t *mphf, FILE *f);
|
||||||
void bmz_destroy(cmph_t *mphf);
|
void bmz_destroy(cmph_t *mphf);
|
||||||
cmph_uint32 bmz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
|
cmph_uint32 bmz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
/** cmph_uint32 bmz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint);
|
||||||
|
* \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., figerprint should be a prealocated area to fit three 4-byte integers).
|
||||||
|
* \param mphf pointer to the resulting function
|
||||||
|
* \param key is the key to be hashed
|
||||||
|
* \param keylen is the key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*
|
||||||
|
* Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be
|
||||||
|
* a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes
|
||||||
|
* as fingerprint. According to the application, just few bits can be enough, once mphf does
|
||||||
|
* not allow collisions for the keys previously known.
|
||||||
|
*/
|
||||||
|
cmph_uint32 bmz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint);
|
||||||
|
|
||||||
|
/** \fn void bmz_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void bmz_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 bmz_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 bmz_packed_size(cmph_t *mphf);
|
||||||
|
|
||||||
|
/** cmph_uint32 bmz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint32 bmz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
94
src/bmz8.c
94
src/bmz8.c
@ -547,3 +547,97 @@ void bmz8_destroy(cmph_t *mphf)
|
|||||||
free(data);
|
free(data);
|
||||||
free(mphf);
|
free(mphf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** cmph_uint8 bmz8_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint);
|
||||||
|
* \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., figerprint should be a prealocated area to fit three 4-byte integers).
|
||||||
|
* \param mphf pointer to the resulting function
|
||||||
|
* \param key is the key to be hashed
|
||||||
|
* \param keylen is the key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*
|
||||||
|
* Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be
|
||||||
|
* a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes
|
||||||
|
* as fingerprint. According to the application, just few bits can be enough, once mphf does
|
||||||
|
* not allow collisions for the keys previously known.
|
||||||
|
*/
|
||||||
|
cmph_uint8 bmz8_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint)
|
||||||
|
{
|
||||||
|
bmz8_data_t *bmz8 = mphf->data;
|
||||||
|
cmph_uint8 h1, h2;
|
||||||
|
|
||||||
|
hash_vector(bmz8->hashes[0], key, keylen, fingerprint);
|
||||||
|
h1 = fingerprint[2] % bmz8->n;
|
||||||
|
|
||||||
|
hash_vector(bmz8->hashes[1], key, keylen, fingerprint);
|
||||||
|
h2 = fingerprint[2] % bmz8->n;
|
||||||
|
|
||||||
|
DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
|
||||||
|
if (h1 == h2 && ++h2 > bmz8->n) h2 = 0;
|
||||||
|
DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, bmz8->g[h1], bmz8->g[h2], bmz8->m);
|
||||||
|
return bmz8->g[h1] + bmz8->g[h2];
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn void bmz8_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void bmz8_pack(cmph_t *mphf, void *packed_mphf)
|
||||||
|
{
|
||||||
|
bmz8_data_t *data = (bmz8_data_t *)mphf->data;
|
||||||
|
cmph_uint8 * ptr = packed_mphf;
|
||||||
|
|
||||||
|
// packing h1
|
||||||
|
hash_state_pack(data->hashes[0], ptr);
|
||||||
|
|
||||||
|
ptr += hash_state_packed_size(data->hashes[0]);
|
||||||
|
|
||||||
|
// packing h2
|
||||||
|
hash_state_pack(data->hashes[1], ptr);
|
||||||
|
ptr += hash_state_packed_size(data->hashes[1]);
|
||||||
|
|
||||||
|
// packing n
|
||||||
|
*ptr++ = data->n;
|
||||||
|
|
||||||
|
// packing g
|
||||||
|
memcpy(ptr, data->g, sizeof(cmph_uint8)*data->n);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 bmz8_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 bmz8_packed_size(cmph_t *mphf)
|
||||||
|
{
|
||||||
|
bmz8_data_t *data = (bmz8_data_t *)mphf->data;
|
||||||
|
return (sizeof(CMPH_ALGO) + 2*hash_state_packed_size(data->hashes[0]) + sizeof(cmph_uint8) + sizeof(cmph_uint8)*data->n);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** cmph_uint8 bmz8_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint8 bmz8_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
register cmph_uint32 *h1_ptr = (cmph_uint32 *)packed_mphf;
|
||||||
|
register cmph_uint32 h1_size = *h1_ptr;
|
||||||
|
|
||||||
|
register cmph_uint32 *h2_ptr = h1_ptr + (h1_size >> 2); // h1_ptr + h1_size/4
|
||||||
|
register cmph_uint32 h2_size = *h2_ptr;
|
||||||
|
|
||||||
|
register cmph_uint8 *g_ptr = (cmph_uint8 *)(h2_ptr + (h2_size >> 2)); // h2_ptr + h2_size/4
|
||||||
|
|
||||||
|
register cmph_uint8 n = *g_ptr++;
|
||||||
|
|
||||||
|
register cmph_uint8 h1 = hash_packed(h1_ptr, key, keylen) % n;
|
||||||
|
register cmph_uint8 h2 = hash_packed(h2_ptr, key, keylen) % n;
|
||||||
|
|
||||||
|
if (h1 == h2 && ++h2 > n) h2 = 0;
|
||||||
|
|
||||||
|
return (g_ptr[h1] + g_ptr[h2]);
|
||||||
|
}
|
||||||
|
38
src/bmz8.h
38
src/bmz8.h
@ -15,4 +15,42 @@ void bmz8_load(FILE *f, cmph_t *mphf);
|
|||||||
int bmz8_dump(cmph_t *mphf, FILE *f);
|
int bmz8_dump(cmph_t *mphf, FILE *f);
|
||||||
void bmz8_destroy(cmph_t *mphf);
|
void bmz8_destroy(cmph_t *mphf);
|
||||||
cmph_uint8 bmz8_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
|
cmph_uint8 bmz8_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
/** cmph_uint8 bmz8_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint);
|
||||||
|
* \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., figerprint should be a prealocated area to fit three 4-byte integers).
|
||||||
|
* \param mphf pointer to the resulting function
|
||||||
|
* \param key is the key to be hashed
|
||||||
|
* \param keylen is the key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*
|
||||||
|
* Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be
|
||||||
|
* a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes
|
||||||
|
* as fingerprint. According to the application, just few bits can be enough, once mphf does
|
||||||
|
* not allow collisions for the keys previously known.
|
||||||
|
*/
|
||||||
|
cmph_uint8 bmz8_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint);
|
||||||
|
|
||||||
|
/** \fn void bmz8_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void bmz8_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 bmz8_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 bmz8_packed_size(cmph_t *mphf);
|
||||||
|
|
||||||
|
/** cmph_uint8 bmz8_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint8 bmz8_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
49
src/brz.c
49
src/brz.c
@ -701,3 +701,52 @@ void brz_destroy(cmph_t *mphf)
|
|||||||
free(data);
|
free(data);
|
||||||
free(mphf);
|
free(mphf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** cmph_uint32 brz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint);
|
||||||
|
* \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., figerprint should be a prealocated area to fit three 4-byte integers).
|
||||||
|
* \param mphf pointer to the resulting function
|
||||||
|
* \param key is the key to be hashed
|
||||||
|
* \param keylen is the key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*
|
||||||
|
* Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be
|
||||||
|
* a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes
|
||||||
|
* as fingerprint. According to the application, just few bits can be enough, once mphf does
|
||||||
|
* not allow collisions for the keys previously known.
|
||||||
|
*/
|
||||||
|
cmph_uint32 brz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn void brz_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void brz_pack(cmph_t *mphf, void *packed_mphf)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 brz_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 brz_packed_size(cmph_t *mphf)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** cmph_uint32 brz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint32 brz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
38
src/brz.h
38
src/brz.h
@ -20,4 +20,42 @@ void brz_load(FILE *f, cmph_t *mphf);
|
|||||||
int brz_dump(cmph_t *mphf, FILE *f);
|
int brz_dump(cmph_t *mphf, FILE *f);
|
||||||
void brz_destroy(cmph_t *mphf);
|
void brz_destroy(cmph_t *mphf);
|
||||||
cmph_uint32 brz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
|
cmph_uint32 brz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
/** cmph_uint32 brz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint);
|
||||||
|
* \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., figerprint should be a prealocated area to fit three 4-byte integers).
|
||||||
|
* \param mphf pointer to the resulting function
|
||||||
|
* \param key is the key to be hashed
|
||||||
|
* \param keylen is the key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*
|
||||||
|
* Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be
|
||||||
|
* a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes
|
||||||
|
* as fingerprint. According to the application, just few bits can be enough, once mphf does
|
||||||
|
* not allow collisions for the keys previously known.
|
||||||
|
*/
|
||||||
|
cmph_uint32 brz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint);
|
||||||
|
|
||||||
|
/** \fn void brz_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void brz_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 brz_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 brz_packed_size(cmph_t *mphf);
|
||||||
|
|
||||||
|
/** cmph_uint32 brz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint32 brz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
101
src/chm.c
101
src/chm.c
@ -292,3 +292,104 @@ void chm_destroy(cmph_t *mphf)
|
|||||||
free(data);
|
free(data);
|
||||||
free(mphf);
|
free(mphf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** cmph_uint32 chm_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint);
|
||||||
|
* \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., figerprint should be a prealocated area to fit three 4-byte integers).
|
||||||
|
* \param mphf pointer to the resulting function
|
||||||
|
* \param key is the key to be hashed
|
||||||
|
* \param keylen is the key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*
|
||||||
|
* Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be
|
||||||
|
* a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes
|
||||||
|
* as fingerprint. According to the application, just few bits can be enough, once mphf does
|
||||||
|
* not allow collisions for the keys previously known.
|
||||||
|
*/
|
||||||
|
cmph_uint32 chm_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint)
|
||||||
|
{
|
||||||
|
chm_data_t *chm = mphf->data;
|
||||||
|
cmph_uint32 h1, h2;
|
||||||
|
|
||||||
|
hash_vector(chm->hashes[0], key, keylen, fingerprint);
|
||||||
|
h1 = fingerprint[2] % chm->n;
|
||||||
|
|
||||||
|
hash_vector(chm->hashes[1], key, keylen, fingerprint);
|
||||||
|
h2 = fingerprint[2] % chm->n;
|
||||||
|
|
||||||
|
DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
|
||||||
|
if (h1 == h2 && ++h2 >= chm->n) h2 = 0;
|
||||||
|
DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, chm->g[h1], chm->g[h2], chm->m);
|
||||||
|
return (chm->g[h1] + chm->g[h2]) % chm->m;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn void chm_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void chm_pack(cmph_t *mphf, void *packed_mphf)
|
||||||
|
{
|
||||||
|
chm_data_t *data = (chm_data_t *)mphf->data;
|
||||||
|
cmph_uint32 * ptr = packed_mphf;
|
||||||
|
|
||||||
|
// packing h1
|
||||||
|
hash_state_pack(data->hashes[0], ptr);
|
||||||
|
|
||||||
|
ptr += (hash_state_packed_size(data->hashes[0]) >> 2); // (hash_state_packed_size(data->hashes[0]) / 4);
|
||||||
|
|
||||||
|
// packing h2
|
||||||
|
hash_state_pack(data->hashes[1], ptr);
|
||||||
|
ptr += (hash_state_packed_size(data->hashes[1]) >> 2); // (hash_state_packed_size(data->hashes[1]) / 4);
|
||||||
|
|
||||||
|
// packing n
|
||||||
|
*ptr++ = data->n;
|
||||||
|
|
||||||
|
// packing m
|
||||||
|
*ptr++ = data->m;
|
||||||
|
|
||||||
|
// packing g
|
||||||
|
memcpy(ptr, data->g, sizeof(cmph_uint32)*data->n);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 chm_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 chm_packed_size(cmph_t *mphf)
|
||||||
|
{
|
||||||
|
chm_data_t *data = (chm_data_t *)mphf->data;
|
||||||
|
return (sizeof(CMPH_ALGO) + 2*hash_state_packed_size(data->hashes[0]) + 2*sizeof(cmph_uint32) + sizeof(cmph_uint32)*data->n);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** cmph_uint32 chm_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint32 chm_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
register cmph_uint32 *h1_ptr = (cmph_uint32 *)packed_mphf;
|
||||||
|
register cmph_uint32 h1_size = *h1_ptr;
|
||||||
|
|
||||||
|
// fprintf(stderr, "h1_size:%u\n", h1_size);
|
||||||
|
|
||||||
|
register cmph_uint32 *h2_ptr = h1_ptr + (h1_size >> 2); // h1_ptr + h1_size/4
|
||||||
|
register cmph_uint32 h2_size = *h2_ptr;
|
||||||
|
// fprintf(stderr, "h2_size:%u\n", h2_size);
|
||||||
|
|
||||||
|
register cmph_uint32 *g_ptr = h2_ptr + (h2_size >> 2); // h2_ptr + h2_size/4
|
||||||
|
|
||||||
|
register cmph_uint32 n = *g_ptr++;
|
||||||
|
register cmph_uint32 m = *g_ptr++;
|
||||||
|
|
||||||
|
register cmph_uint32 h1 = hash_packed(h1_ptr, key, keylen) % n;
|
||||||
|
register cmph_uint32 h2 = hash_packed(h2_ptr, key, keylen) % n;
|
||||||
|
DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2);
|
||||||
|
if (h1 == h2 && ++h2 >= n) h2 = 0;
|
||||||
|
DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, g_ptr[h1], g_ptr[h2], m);
|
||||||
|
return (g_ptr[h1] + g_ptr[h2]) % m;
|
||||||
|
}
|
||||||
|
38
src/chm.h
38
src/chm.h
@ -15,4 +15,42 @@ void chm_load(FILE *f, cmph_t *mphf);
|
|||||||
int chm_dump(cmph_t *mphf, FILE *f);
|
int chm_dump(cmph_t *mphf, FILE *f);
|
||||||
void chm_destroy(cmph_t *mphf);
|
void chm_destroy(cmph_t *mphf);
|
||||||
cmph_uint32 chm_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
|
cmph_uint32 chm_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
/** cmph_uint32 chm_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint);
|
||||||
|
* \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., figerprint should be a prealocated area to fit three 4-byte integers).
|
||||||
|
* \param mphf pointer to the resulting function
|
||||||
|
* \param key is the key to be hashed
|
||||||
|
* \param keylen is the key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*
|
||||||
|
* Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be
|
||||||
|
* a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes
|
||||||
|
* as fingerprint. According to the application, just few bits can be enough, once mphf does
|
||||||
|
* not allow collisions for the keys previously known.
|
||||||
|
*/
|
||||||
|
cmph_uint32 chm_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint);
|
||||||
|
|
||||||
|
/** \fn void chm_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void chm_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 chm_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 chm_packed_size(cmph_t *mphf);
|
||||||
|
|
||||||
|
/** cmph_uint32 chm_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint32 chm_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
151
src/cmph.c
151
src/cmph.c
@ -520,6 +520,54 @@ cmph_uint32 cmph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/** cmph_uint32 cmph_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint);
|
||||||
|
* \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., figerprint should be a prealocated area to fit three 4-byte integers).
|
||||||
|
* \param mphf pointer to the resulting function
|
||||||
|
* \param key is the key to be hashed
|
||||||
|
* \param keylen is the key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*
|
||||||
|
* Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be
|
||||||
|
* a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes
|
||||||
|
* as fingerprint. According to the application, just few bits can be enough, once mphf does
|
||||||
|
* not allow collisions for the keys previously known.
|
||||||
|
*/
|
||||||
|
cmph_uint32 cmph_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint)
|
||||||
|
{
|
||||||
|
DEBUGP("mphf algorithm: %u \n", mphf->algo);
|
||||||
|
switch(mphf->algo)
|
||||||
|
{
|
||||||
|
case CMPH_CHM:
|
||||||
|
return chm_search_fingerprint(mphf, key, keylen, fingerprint);
|
||||||
|
case CMPH_BMZ: /* included -- Fabiano */
|
||||||
|
DEBUGP("bmz algorithm search\n");
|
||||||
|
return bmz_search_fingerprint(mphf, key, keylen, fingerprint);
|
||||||
|
case CMPH_BMZ8: /* included -- Fabiano */
|
||||||
|
DEBUGP("bmz8 algorithm search\n");
|
||||||
|
return bmz8_search_fingerprint(mphf, key, keylen, fingerprint);
|
||||||
|
case CMPH_BRZ: /* included -- Fabiano */
|
||||||
|
DEBUGP("brz algorithm search\n");
|
||||||
|
return brz_search_fingerprint(mphf, key, keylen, fingerprint);
|
||||||
|
case CMPH_FCH: /* included -- Fabiano */
|
||||||
|
DEBUGP("fch algorithm search\n");
|
||||||
|
return fch_search_fingerprint(mphf, key, keylen, fingerprint);
|
||||||
|
case CMPH_BDZ: /* included -- Fabiano */
|
||||||
|
DEBUGP("bdz algorithm search\n");
|
||||||
|
return bdz_search_fingerprint(mphf, key, keylen, fingerprint);
|
||||||
|
case CMPH_BDZ_PH: /* included -- Fabiano */
|
||||||
|
DEBUGP("bdz_ph algorithm search\n");
|
||||||
|
return bdz_ph_search_fingerprint(mphf, key, keylen, fingerprint);
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
assert(0);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
cmph_uint32 cmph_size(cmph_t *mphf)
|
cmph_uint32 cmph_size(cmph_t *mphf)
|
||||||
{
|
{
|
||||||
return mphf->size;
|
return mphf->size;
|
||||||
@ -556,3 +604,106 @@ void cmph_destroy(cmph_t *mphf)
|
|||||||
assert(0);
|
assert(0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** \fn void cmph_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void cmph_pack(cmph_t *mphf, void *packed_mphf)
|
||||||
|
{
|
||||||
|
// packing algorithm type to be used in cmph.c
|
||||||
|
cmph_uint32 * ptr = (cmph_uint32 *) packed_mphf;
|
||||||
|
*ptr++ = mphf->algo;
|
||||||
|
DEBUGP("mphf->algo = %u\n", mphf->algo);
|
||||||
|
switch(mphf->algo)
|
||||||
|
{
|
||||||
|
case CMPH_CHM:
|
||||||
|
chm_pack(mphf, ptr);
|
||||||
|
break;
|
||||||
|
case CMPH_BMZ: /* included -- Fabiano */
|
||||||
|
bmz_pack(mphf, ptr);
|
||||||
|
break;
|
||||||
|
case CMPH_BMZ8: /* included -- Fabiano */
|
||||||
|
bmz8_pack(mphf, ptr);
|
||||||
|
break;
|
||||||
|
case CMPH_BRZ: /* included -- Fabiano */
|
||||||
|
brz_pack(mphf, ptr);
|
||||||
|
break;
|
||||||
|
case CMPH_FCH: /* included -- Fabiano */
|
||||||
|
fch_pack(mphf, ptr);
|
||||||
|
break;
|
||||||
|
case CMPH_BDZ: /* included -- Fabiano */
|
||||||
|
bdz_pack(mphf, ptr);
|
||||||
|
break;
|
||||||
|
case CMPH_BDZ_PH: /* included -- Fabiano */
|
||||||
|
bdz_ph_pack(mphf, ptr);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 cmph_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 cmph_packed_size(cmph_t *mphf)
|
||||||
|
{
|
||||||
|
switch(mphf->algo)
|
||||||
|
{
|
||||||
|
case CMPH_CHM:
|
||||||
|
return chm_packed_size(mphf);
|
||||||
|
case CMPH_BMZ: /* included -- Fabiano */
|
||||||
|
return bmz_packed_size(mphf);
|
||||||
|
case CMPH_BMZ8: /* included -- Fabiano */
|
||||||
|
return bmz8_packed_size(mphf);
|
||||||
|
case CMPH_BRZ: /* included -- Fabiano */
|
||||||
|
return brz_packed_size(mphf);
|
||||||
|
case CMPH_FCH: /* included -- Fabiano */
|
||||||
|
return fch_packed_size(mphf);
|
||||||
|
case CMPH_BDZ: /* included -- Fabiano */
|
||||||
|
return bdz_packed_size(mphf);
|
||||||
|
case CMPH_BDZ_PH: /* included -- Fabiano */
|
||||||
|
return bdz_ph_packed_size(mphf);
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
return 0; // FAILURE
|
||||||
|
}
|
||||||
|
|
||||||
|
/** cmph_uint32 cmph_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint32 cmph_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
cmph_uint32 *ptr = (cmph_uint32 *)packed_mphf;
|
||||||
|
// fprintf(stderr, "algo:%u\n", *ptr);
|
||||||
|
switch(*ptr)
|
||||||
|
{
|
||||||
|
case CMPH_CHM:
|
||||||
|
return chm_search_packed(++ptr, key, keylen);
|
||||||
|
case CMPH_BMZ: /* included -- Fabiano */
|
||||||
|
return bmz_search_packed(++ptr, key, keylen);
|
||||||
|
case CMPH_BMZ8: /* included -- Fabiano */
|
||||||
|
return bmz8_search_packed(++ptr, key, keylen);
|
||||||
|
case CMPH_BRZ: /* included -- Fabiano */
|
||||||
|
return brz_search_packed(++ptr, key, keylen);
|
||||||
|
case CMPH_FCH: /* included -- Fabiano */
|
||||||
|
return fch_search_packed(++ptr, key, keylen);
|
||||||
|
case CMPH_BDZ: /* included -- Fabiano */
|
||||||
|
return bdz_search_packed(++ptr, key, keylen);
|
||||||
|
case CMPH_BDZ_PH: /* included -- Fabiano */
|
||||||
|
return bdz_ph_search_packed(++ptr, key, keylen);
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
return 0; // FAILURE
|
||||||
|
}
|
||||||
|
47
src/cmph.h
47
src/cmph.h
@ -51,7 +51,31 @@ void cmph_config_destroy(cmph_config_t *mph);
|
|||||||
|
|
||||||
/** Hash API **/
|
/** Hash API **/
|
||||||
cmph_t *cmph_new(cmph_config_t *mph);
|
cmph_t *cmph_new(cmph_config_t *mph);
|
||||||
|
|
||||||
|
/** cmph_uint32 cmph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Computes the mphf value.
|
||||||
|
* \param mphf pointer to the resulting function
|
||||||
|
* \param key is the key to be hashed
|
||||||
|
* \param keylen is the key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
cmph_uint32 cmph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
|
cmph_uint32 cmph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
/** cmph_uint32 cmph_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint);
|
||||||
|
* \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., figerprint should be a prealocated area to fit three 4-byte integers).
|
||||||
|
* \param mphf pointer to the resulting function
|
||||||
|
* \param key is the key to be hashed
|
||||||
|
* \param keylen is the key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*
|
||||||
|
* Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be
|
||||||
|
* a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes
|
||||||
|
* as fingerprint. According to the application, just few bits can be enough, once mphf does
|
||||||
|
* not allow collisions for the keys previously known.
|
||||||
|
*/
|
||||||
|
cmph_uint32 cmph_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint);
|
||||||
|
|
||||||
|
|
||||||
cmph_uint32 cmph_size(cmph_t *mphf);
|
cmph_uint32 cmph_size(cmph_t *mphf);
|
||||||
void cmph_destroy(cmph_t *mphf);
|
void cmph_destroy(cmph_t *mphf);
|
||||||
|
|
||||||
@ -59,6 +83,29 @@ void cmph_destroy(cmph_t *mphf);
|
|||||||
int cmph_dump(cmph_t *mphf, FILE *f);
|
int cmph_dump(cmph_t *mphf, FILE *f);
|
||||||
cmph_t *cmph_load(FILE *f);
|
cmph_t *cmph_load(FILE *f);
|
||||||
|
|
||||||
|
/** \fn void cmph_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void cmph_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 cmph_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 cmph_packed_size(cmph_t *mphf);
|
||||||
|
|
||||||
|
/** cmph_uint32 cmph_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint32 cmph_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
50
src/fch.c
50
src/fch.c
@ -410,3 +410,53 @@ void fch_destroy(cmph_t *mphf)
|
|||||||
free(data);
|
free(data);
|
||||||
free(mphf);
|
free(mphf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** cmph_uint32 fch_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint);
|
||||||
|
* \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., figerprint should be a prealocated area to fit three 4-byte integers).
|
||||||
|
* \param mphf pointer to the resulting function
|
||||||
|
* \param key is the key to be hashed
|
||||||
|
* \param keylen is the key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*
|
||||||
|
* Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be
|
||||||
|
* a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes
|
||||||
|
* as fingerprint. According to the application, just few bits can be enough, once mphf does
|
||||||
|
* not allow collisions for the keys previously known.
|
||||||
|
*/
|
||||||
|
cmph_uint32 fch_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn void fch_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void fch_pack(cmph_t *mphf, void *packed_mphf)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 fch_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 fch_packed_size(cmph_t *mphf)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** cmph_uint32 fch_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint32 fch_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
38
src/fch.h
38
src/fch.h
@ -21,4 +21,42 @@ void fch_load(FILE *f, cmph_t *mphf);
|
|||||||
int fch_dump(cmph_t *mphf, FILE *f);
|
int fch_dump(cmph_t *mphf, FILE *f);
|
||||||
void fch_destroy(cmph_t *mphf);
|
void fch_destroy(cmph_t *mphf);
|
||||||
cmph_uint32 fch_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
|
cmph_uint32 fch_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
/** cmph_uint32 fch_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint);
|
||||||
|
* \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., figerprint should be a prealocated area to fit three 4-byte integers).
|
||||||
|
* \param mphf pointer to the resulting function
|
||||||
|
* \param key is the key to be hashed
|
||||||
|
* \param keylen is the key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*
|
||||||
|
* Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be
|
||||||
|
* a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes
|
||||||
|
* as fingerprint. According to the application, just few bits can be enough, once mphf does
|
||||||
|
* not allow collisions for the keys previously known.
|
||||||
|
*/
|
||||||
|
cmph_uint32 fch_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint);
|
||||||
|
|
||||||
|
/** \fn void fch_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
* \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
|
||||||
|
* \param mphf pointer to the resulting mphf
|
||||||
|
* \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
|
||||||
|
*/
|
||||||
|
void fch_pack(cmph_t *mphf, void *packed_mphf);
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 fch_packed_size(cmph_t *mphf);
|
||||||
|
* \brief Return the amount of space needed to pack mphf.
|
||||||
|
* \param mphf pointer to a mphf
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 fch_packed_size(cmph_t *mphf);
|
||||||
|
|
||||||
|
/** cmph_uint32 fch_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
* \brief Use the packed mphf to do a search.
|
||||||
|
* \param packed_mphf pointer to the packed mphf
|
||||||
|
* \param key key to be hashed
|
||||||
|
* \param keylen key legth in bytes
|
||||||
|
* \return The mphf value
|
||||||
|
*/
|
||||||
|
cmph_uint32 fch_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
95
src/hash.c
95
src/hash.c
@ -43,7 +43,7 @@ void hash_vector(hash_state_t *state, const char *key, cmph_uint32 keylen, cmph_
|
|||||||
switch (state->hashfunc)
|
switch (state->hashfunc)
|
||||||
{
|
{
|
||||||
case CMPH_HASH_JENKINS:
|
case CMPH_HASH_JENKINS:
|
||||||
jenkins_hash_vector((jenkins_state_t *)state, key, keylen, hashes);
|
jenkins_hash_vector_((jenkins_state_t *)state, key, keylen, hashes);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
assert(0);
|
assert(0);
|
||||||
@ -123,3 +123,96 @@ void hash_state_destroy(hash_state_t *state)
|
|||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** \fn void hash_state_pack(hash_state_t *state, void *hash_packed);
|
||||||
|
* \brief Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed.
|
||||||
|
* \param state points to the hash function
|
||||||
|
* \param hash_packed pointer to the contiguous memory area used to store the hash function. The size of hash_packed must be at least hash_state_packed_size()
|
||||||
|
*/
|
||||||
|
void hash_state_pack(hash_state_t *state, void *hash_packed)
|
||||||
|
{
|
||||||
|
cmph_uint32 * ptr = (cmph_uint32 *)hash_packed;
|
||||||
|
cmph_uint32 * ptr_size = ptr++;
|
||||||
|
|
||||||
|
// Reserve space for the hash function size
|
||||||
|
*ptr_size = 0;
|
||||||
|
|
||||||
|
// Pack the hash function type
|
||||||
|
*ptr++ = state->hashfunc;
|
||||||
|
|
||||||
|
switch (state->hashfunc)
|
||||||
|
{
|
||||||
|
case CMPH_HASH_JENKINS:
|
||||||
|
// pack the jenkins hash function
|
||||||
|
jenkins_state_pack((jenkins_state_t *)state, ptr);
|
||||||
|
*ptr_size = sizeof(cmph_uint32) + sizeof(CMPH_HASH) + jenkins_state_packed_size();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 hash_state_packed_size(hash_state_t *state);
|
||||||
|
* \brief Return the amount of space needed to pack a hash function.
|
||||||
|
* \param state points to a hash function
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 hash_state_packed_size(hash_state_t *state)
|
||||||
|
{
|
||||||
|
cmph_uint32 size = sizeof(cmph_uint32) + sizeof(CMPH_HASH);
|
||||||
|
switch (state->hashfunc)
|
||||||
|
{
|
||||||
|
case CMPH_HASH_JENKINS:
|
||||||
|
size += jenkins_state_packed_size();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 hash_packed(void *hash_packed, const char *k, cmph_uint32 keylen)
|
||||||
|
* \param hash_packed is a pointer to a contiguous memory area
|
||||||
|
* \param key is a pointer to a key
|
||||||
|
* \param keylen is the key length
|
||||||
|
* \return an integer that represents a hash value of 32 bits.
|
||||||
|
*/
|
||||||
|
cmph_uint32 hash_packed(void *hash_packed, const char *k, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
register cmph_uint32 * ptr = (((cmph_uint32 *) hash_packed) + 1);
|
||||||
|
|
||||||
|
register CMPH_HASH hashfunc = *ptr++;
|
||||||
|
|
||||||
|
switch (hashfunc)
|
||||||
|
{
|
||||||
|
case CMPH_HASH_JENKINS:
|
||||||
|
return jenkins_hash_packed(ptr, k, keylen);
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
assert(0);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn hash_vector_packed(void *hash_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
|
||||||
|
* \param hash_packed is a pointer to a contiguous memory area
|
||||||
|
* \param key is a pointer to a key
|
||||||
|
* \param keylen is the key length
|
||||||
|
* \param hashes is a pointer to a memory large enough to fit three 32-bit integers.
|
||||||
|
*/
|
||||||
|
void hash_vector_packed(void *hash_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
|
||||||
|
{
|
||||||
|
cmph_uint32 * ptr = (((cmph_uint32 *) hash_packed) + 1);
|
||||||
|
|
||||||
|
CMPH_HASH hashfunc = *ptr++;
|
||||||
|
switch (hashfunc)
|
||||||
|
{
|
||||||
|
case CMPH_HASH_JENKINS:
|
||||||
|
jenkins_hash_vector_packed(ptr, k, keylen, hashes);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
31
src/hash.h
31
src/hash.h
@ -31,4 +31,35 @@ hash_state_t *hash_state_load(const char *buf, cmph_uint32 buflen);
|
|||||||
|
|
||||||
void hash_state_destroy(hash_state_t *state);
|
void hash_state_destroy(hash_state_t *state);
|
||||||
|
|
||||||
|
/** \fn void hash_state_pack(hash_state_t *state, void *hash_packed);
|
||||||
|
* \brief Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed.
|
||||||
|
* \param state points to the hash function
|
||||||
|
* \param hash_packed pointer to the contiguous memory area used to store the hash function. The size of hash_packed must be at least hash_state_packed_size()
|
||||||
|
*/
|
||||||
|
void hash_state_pack(hash_state_t *state, void *hash_packed);
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 hash_state_packed_size(hash_state_t *state);
|
||||||
|
* \brief Return the amount of space needed to pack a hash function.
|
||||||
|
* \param state points to a hash function
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 hash_state_packed_size(hash_state_t *state);
|
||||||
|
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 hash_packed(void *hash_packed, const char *k, cmph_uint32 keylen);
|
||||||
|
* \param hash_packed is a pointer to a contiguous memory area
|
||||||
|
* \param key is a pointer to a key
|
||||||
|
* \param keylen is the key length
|
||||||
|
* \return an integer that represents a hash value of 32 bits.
|
||||||
|
*/
|
||||||
|
cmph_uint32 hash_packed(void *hash_packed, const char *k, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
/** \fn hash_vector_packed(void *hash_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
|
||||||
|
* \param hash_packed is a pointer to a contiguous memory area
|
||||||
|
* \param key is a pointer to a key
|
||||||
|
* \param keylen is the key length
|
||||||
|
* \param hashes is a pointer to a memory large enough to fit three 32-bit integers.
|
||||||
|
*/
|
||||||
|
void hash_vector_packed(void *hash_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -96,78 +96,16 @@ void jenkins_state_destroy(jenkins_state_t *state)
|
|||||||
free(state);
|
free(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keylen)
|
|
||||||
|
inline void __jenkins_hash_vector(cmph_uint32 seed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
|
||||||
{
|
{
|
||||||
cmph_uint32 a, b, c;
|
register cmph_uint32 len, length;
|
||||||
cmph_uint32 len, length;
|
|
||||||
|
|
||||||
/* Set up the internal state */
|
|
||||||
length = keylen;
|
|
||||||
len = length;
|
|
||||||
a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
|
|
||||||
c = state->seed; /* the previous hash value - seed in our case */
|
|
||||||
|
|
||||||
/*---------------------------------------- handle most of the key */
|
|
||||||
while (len >= 12)
|
|
||||||
{
|
|
||||||
a += (k[0] +((cmph_uint32)k[1]<<8) +((cmph_uint32)k[2]<<16) +((cmph_uint32)k[3]<<24));
|
|
||||||
b += (k[4] +((cmph_uint32)k[5]<<8) +((cmph_uint32)k[6]<<16) +((cmph_uint32)k[7]<<24));
|
|
||||||
c += (k[8] +((cmph_uint32)k[9]<<8) +((cmph_uint32)k[10]<<16)+((cmph_uint32)k[11]<<24));
|
|
||||||
mix(a,b,c);
|
|
||||||
k += 12; len -= 12;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*------------------------------------- handle the last 11 bytes */
|
|
||||||
c += length;
|
|
||||||
switch(len) /* all the case statements fall through */
|
|
||||||
{
|
|
||||||
case 11:
|
|
||||||
c +=((cmph_uint32)k[10]<<24);
|
|
||||||
case 10:
|
|
||||||
c +=((cmph_uint32)k[9]<<16);
|
|
||||||
case 9 :
|
|
||||||
c +=((cmph_uint32)k[8]<<8);
|
|
||||||
/* the first byte of c is reserved for the length */
|
|
||||||
case 8 :
|
|
||||||
b +=((cmph_uint32)k[7]<<24);
|
|
||||||
case 7 :
|
|
||||||
b +=((cmph_uint32)k[6]<<16);
|
|
||||||
case 6 :
|
|
||||||
b +=((cmph_uint32)k[5]<<8);
|
|
||||||
case 5 :
|
|
||||||
b +=k[4];
|
|
||||||
case 4 :
|
|
||||||
a +=((cmph_uint32)k[3]<<24);
|
|
||||||
case 3 :
|
|
||||||
a +=((cmph_uint32)k[2]<<16);
|
|
||||||
case 2 :
|
|
||||||
a +=((cmph_uint32)k[1]<<8);
|
|
||||||
case 1 :
|
|
||||||
a +=k[0];
|
|
||||||
/* case 0: nothing left to add */
|
|
||||||
}
|
|
||||||
|
|
||||||
mix(a,b,c);
|
|
||||||
|
|
||||||
/*-------------------------------------------- report the result */
|
|
||||||
|
|
||||||
//c = (c & hashmask(state->size));
|
|
||||||
//c = (c >= state->size) ? c ^ state->size: c;
|
|
||||||
|
|
||||||
//state->last_hash = c; Do not update last_hash because we use a fixed
|
|
||||||
//seed
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
void jenkins_hash_vector(jenkins_state_t *state, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
|
|
||||||
{
|
|
||||||
cmph_uint32 len, length;
|
|
||||||
|
|
||||||
/* Set up the internal state */
|
/* Set up the internal state */
|
||||||
length = keylen;
|
length = keylen;
|
||||||
len = length;
|
len = length;
|
||||||
hashes[0] = hashes[1] = 0x9e3779b9; /* the golden ratio; an arbitrary value */
|
hashes[0] = hashes[1] = 0x9e3779b9; /* the golden ratio; an arbitrary value */
|
||||||
hashes[2] = state->seed; /* the previous hash value - seed in our case */
|
hashes[2] = seed; /* the previous hash value - seed in our case */
|
||||||
|
|
||||||
/*---------------------------------------- handle most of the key */
|
/*---------------------------------------- handle most of the key */
|
||||||
while (len >= 12)
|
while (len >= 12)
|
||||||
@ -212,6 +150,73 @@ void jenkins_hash_vector(jenkins_state_t *state, const char *k, cmph_uint32 keyl
|
|||||||
mix(hashes[0],hashes[1],hashes[2]);
|
mix(hashes[0],hashes[1],hashes[2]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
cmph_uint32 hashes[3];
|
||||||
|
__jenkins_hash_vector(state->seed, k, keylen, hashes);
|
||||||
|
return hashes[2];
|
||||||
|
/* cmph_uint32 a, b, c;
|
||||||
|
cmph_uint32 len, length;
|
||||||
|
|
||||||
|
// Set up the internal state
|
||||||
|
length = keylen;
|
||||||
|
len = length;
|
||||||
|
a = b = 0x9e3779b9; // the golden ratio; an arbitrary value
|
||||||
|
c = state->seed; // the previous hash value - seed in our case
|
||||||
|
|
||||||
|
// handle most of the key
|
||||||
|
while (len >= 12)
|
||||||
|
{
|
||||||
|
a += (k[0] +((cmph_uint32)k[1]<<8) +((cmph_uint32)k[2]<<16) +((cmph_uint32)k[3]<<24));
|
||||||
|
b += (k[4] +((cmph_uint32)k[5]<<8) +((cmph_uint32)k[6]<<16) +((cmph_uint32)k[7]<<24));
|
||||||
|
c += (k[8] +((cmph_uint32)k[9]<<8) +((cmph_uint32)k[10]<<16)+((cmph_uint32)k[11]<<24));
|
||||||
|
mix(a,b,c);
|
||||||
|
k += 12; len -= 12;
|
||||||
|
}
|
||||||
|
|
||||||
|
// handle the last 11 bytes
|
||||||
|
c += length;
|
||||||
|
switch(len) /// all the case statements fall through
|
||||||
|
{
|
||||||
|
case 11:
|
||||||
|
c +=((cmph_uint32)k[10]<<24);
|
||||||
|
case 10:
|
||||||
|
c +=((cmph_uint32)k[9]<<16);
|
||||||
|
case 9 :
|
||||||
|
c +=((cmph_uint32)k[8]<<8);
|
||||||
|
// the first byte of c is reserved for the length
|
||||||
|
case 8 :
|
||||||
|
b +=((cmph_uint32)k[7]<<24);
|
||||||
|
case 7 :
|
||||||
|
b +=((cmph_uint32)k[6]<<16);
|
||||||
|
case 6 :
|
||||||
|
b +=((cmph_uint32)k[5]<<8);
|
||||||
|
case 5 :
|
||||||
|
b +=k[4];
|
||||||
|
case 4 :
|
||||||
|
a +=((cmph_uint32)k[3]<<24);
|
||||||
|
case 3 :
|
||||||
|
a +=((cmph_uint32)k[2]<<16);
|
||||||
|
case 2 :
|
||||||
|
a +=((cmph_uint32)k[1]<<8);
|
||||||
|
case 1 :
|
||||||
|
a +=k[0];
|
||||||
|
// case 0: nothing left to add
|
||||||
|
}
|
||||||
|
|
||||||
|
mix(a,b,c);
|
||||||
|
|
||||||
|
/// report the result
|
||||||
|
|
||||||
|
return c;
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
void jenkins_hash_vector_(jenkins_state_t *state, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
|
||||||
|
{
|
||||||
|
__jenkins_hash_vector(state->seed, k, keylen, hashes);
|
||||||
|
}
|
||||||
|
|
||||||
void jenkins_state_dump(jenkins_state_t *state, char **buf, cmph_uint32 *buflen)
|
void jenkins_state_dump(jenkins_state_t *state, char **buf, cmph_uint32 *buflen)
|
||||||
{
|
{
|
||||||
*buflen = sizeof(cmph_uint32);
|
*buflen = sizeof(cmph_uint32);
|
||||||
@ -242,3 +247,51 @@ jenkins_state_t *jenkins_state_load(const char *buf, cmph_uint32 buflen)
|
|||||||
DEBUGP("Loaded jenkins state with seed %u\n", state->seed);
|
DEBUGP("Loaded jenkins state with seed %u\n", state->seed);
|
||||||
return state;
|
return state;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** \fn void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed);
|
||||||
|
* \brief Support the ability to pack a jenkins function into a preallocated contiguous memory space pointed by jenkins_packed.
|
||||||
|
* \param state points to the jenkins function
|
||||||
|
* \param jenkins_packed pointer to the contiguous memory area used to store the jenkins function. The size of jenkins_packed must be at least jenkins_state_packed_size()
|
||||||
|
*/
|
||||||
|
void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed)
|
||||||
|
{
|
||||||
|
if (state && jenkins_packed)
|
||||||
|
{
|
||||||
|
memcpy(jenkins_packed, &(state->seed), sizeof(cmph_uint32));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 jenkins_state_packed_size(jenkins_state_t *state);
|
||||||
|
* \brief Return the amount of space needed to pack a jenkins function.
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 jenkins_state_packed_size()
|
||||||
|
{
|
||||||
|
return sizeof(cmph_uint32);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 jenkins_hash_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen);
|
||||||
|
* \param jenkins_packed is a pointer to a contiguous memory area
|
||||||
|
* \param key is a pointer to a key
|
||||||
|
* \param keylen is the key length
|
||||||
|
* \return an integer that represents a hash value of 32 bits.
|
||||||
|
*/
|
||||||
|
cmph_uint32 jenkins_hash_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen)
|
||||||
|
{
|
||||||
|
cmph_uint32 hashes[3];
|
||||||
|
__jenkins_hash_vector(*((cmph_uint32 *)jenkins_packed), k, keylen, hashes);
|
||||||
|
return hashes[2];
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \fn jenkins_hash_vector_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
|
||||||
|
* \param jenkins_packed is a pointer to a contiguous memory area
|
||||||
|
* \param key is a pointer to a key
|
||||||
|
* \param keylen is the key length
|
||||||
|
* \param hashes is a pointer to a memory large enough to fit three 32-bit integers.
|
||||||
|
*/
|
||||||
|
void jenkins_hash_vector_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
|
||||||
|
{
|
||||||
|
__jenkins_hash_vector(*((cmph_uint32 *)jenkins_packed), k, keylen, hashes);
|
||||||
|
}
|
||||||
|
@ -19,17 +19,47 @@ jenkins_state_t *jenkins_state_new(cmph_uint32 size); //size of hash table
|
|||||||
*/
|
*/
|
||||||
cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keylen);
|
cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keylen);
|
||||||
|
|
||||||
/** \fn void jenkins_hash_vector(jenkins_state_t *state, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
|
/** \fn void jenkins_hash_vector_(jenkins_state_t *state, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
|
||||||
* \param state is a pointer to a jenkins_state_t structure
|
* \param state is a pointer to a jenkins_state_t structure
|
||||||
* \param key is a pointer to a key
|
* \param key is a pointer to a key
|
||||||
* \param keylen is the key length
|
* \param keylen is the key length
|
||||||
* \param hashes is a pointer to a memory large enough to fit three 32-bit integers.
|
* \param hashes is a pointer to a memory large enough to fit three 32-bit integers.
|
||||||
*/
|
*/
|
||||||
void jenkins_hash_vector(jenkins_state_t *state, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
|
void jenkins_hash_vector_(jenkins_state_t *state, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
|
||||||
|
|
||||||
void jenkins_state_dump(jenkins_state_t *state, char **buf, cmph_uint32 *buflen);
|
void jenkins_state_dump(jenkins_state_t *state, char **buf, cmph_uint32 *buflen);
|
||||||
jenkins_state_t *jenkins_state_copy(jenkins_state_t *src_state);
|
jenkins_state_t *jenkins_state_copy(jenkins_state_t *src_state);
|
||||||
jenkins_state_t *jenkins_state_load(const char *buf, cmph_uint32 buflen);
|
jenkins_state_t *jenkins_state_load(const char *buf, cmph_uint32 buflen);
|
||||||
void jenkins_state_destroy(jenkins_state_t *state);
|
void jenkins_state_destroy(jenkins_state_t *state);
|
||||||
|
|
||||||
|
/** \fn void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed);
|
||||||
|
* \brief Support the ability to pack a jenkins function into a preallocated contiguous memory space pointed by jenkins_packed.
|
||||||
|
* \param state points to the jenkins function
|
||||||
|
* \param jenkins_packed pointer to the contiguous memory area used to store the jenkins function. The size of jenkins_packed must be at least jenkins_state_packed_size()
|
||||||
|
*/
|
||||||
|
void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed);
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 jenkins_state_packed_size(jenkins_state_t *state);
|
||||||
|
* \brief Return the amount of space needed to pack a jenkins function.
|
||||||
|
* \return the size of the packed function or zero for failures
|
||||||
|
*/
|
||||||
|
cmph_uint32 jenkins_state_packed_size();
|
||||||
|
|
||||||
|
|
||||||
|
/** \fn cmph_uint32 jenkins_hash_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen);
|
||||||
|
* \param jenkins_packed is a pointer to a contiguous memory area
|
||||||
|
* \param key is a pointer to a key
|
||||||
|
* \param keylen is the key length
|
||||||
|
* \return an integer that represents a hash value of 32 bits.
|
||||||
|
*/
|
||||||
|
cmph_uint32 jenkins_hash_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen);
|
||||||
|
|
||||||
|
/** \fn jenkins_hash_vector_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
|
||||||
|
* \param jenkins_packed is a pointer to a contiguous memory area
|
||||||
|
* \param key is a pointer to a key
|
||||||
|
* \param keylen is the key length
|
||||||
|
* \param hashes is a pointer to a memory large enough to fit three 32-bit integers.
|
||||||
|
*/
|
||||||
|
void jenkins_hash_vector_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
#include "hash.h"
|
#include "hash.h"
|
||||||
|
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
#define VERSION "0.2"
|
#define VERSION "0.8"
|
||||||
#else
|
#else
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
#endif
|
#endif
|
||||||
@ -305,6 +305,7 @@ int main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
source->dispose(source->data, buf, buflen);
|
source->dispose(source->data, buf, buflen);
|
||||||
}
|
}
|
||||||
|
|
||||||
cmph_destroy(mphf);
|
cmph_destroy(mphf);
|
||||||
free(hashtable);
|
free(hashtable);
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,15 @@
|
|||||||
noinst_PROGRAMS = graph_tests
|
noinst_PROGRAMS = graph_tests packed_mphf_tests mphf_tests mphf_fingerprint_tests
|
||||||
|
|
||||||
|
INCLUDES = -I../src/
|
||||||
|
|
||||||
graph_tests_SOURCES = graph_tests.c
|
graph_tests_SOURCES = graph_tests.c
|
||||||
graph_tests_LDADD = ../src/libcmph.la
|
graph_tests_LDADD = ../src/libcmph.la
|
||||||
|
|
||||||
|
packed_mphf_tests_SOURCES = packed_mphf_tests.c
|
||||||
|
packed_mphf_tests_LDADD = ../src/libcmph.la
|
||||||
|
|
||||||
|
mphf_tests_SOURCES = mphf_tests.c
|
||||||
|
mphf_tests_LDADD = ../src/libcmph.la
|
||||||
|
|
||||||
|
mphf_fingerprint_tests_SOURCES = mphf_fingerprint_tests.c
|
||||||
|
mphf_fingerprint_tests_LDADD = ../src/libcmph.la
|
||||||
|
162
tests/mphf_fingerprint_tests.c
Normal file
162
tests/mphf_fingerprint_tests.c
Normal file
@ -0,0 +1,162 @@
|
|||||||
|
#ifdef WIN32
|
||||||
|
#include "../wingetopt.h"
|
||||||
|
#else
|
||||||
|
#include <getopt.h>
|
||||||
|
#endif
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <cmph.h>
|
||||||
|
|
||||||
|
#ifdef WIN32
|
||||||
|
#define VERSION "0.8"
|
||||||
|
#else
|
||||||
|
#include "config.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
void usage(const char *prg)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-m file.mph] keysfile\n", prg);
|
||||||
|
}
|
||||||
|
void usage_long(const char *prg)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-m file.mph] keysfile\n", prg);
|
||||||
|
fprintf(stderr, "Packed MPHFs testing tool\n\n");
|
||||||
|
fprintf(stderr, " -h\t print this help message\n");
|
||||||
|
fprintf(stderr, " -V\t print version number and exit\n");
|
||||||
|
fprintf(stderr, " -v\t increase verbosity (may be used multiple times)\n");
|
||||||
|
fprintf(stderr, " -k\t number of keys\n");
|
||||||
|
fprintf(stderr, " -m\t minimum perfect hash function file \n");
|
||||||
|
fprintf(stderr, " keysfile\t line separated file with keys\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
char verbosity = 0;
|
||||||
|
char *mphf_file = NULL;
|
||||||
|
const char *keys_file = NULL;
|
||||||
|
FILE *mphf_fd = stdout;
|
||||||
|
FILE *keys_fd;
|
||||||
|
cmph_uint32 nkeys = UINT_MAX;
|
||||||
|
cmph_uint32 i = 0;
|
||||||
|
cmph_t *mphf = NULL;
|
||||||
|
cmph_io_adapter_t *source;
|
||||||
|
cmph_uint32 fingerprint[3];
|
||||||
|
while (1)
|
||||||
|
{
|
||||||
|
char ch = getopt(argc, argv, "hVvk:m:");
|
||||||
|
if (ch == -1) break;
|
||||||
|
switch (ch)
|
||||||
|
{
|
||||||
|
case 'k':
|
||||||
|
{
|
||||||
|
char *endptr;
|
||||||
|
nkeys = strtoul(optarg, &endptr, 10);
|
||||||
|
if(*endptr != 0) {
|
||||||
|
fprintf(stderr, "Invalid number of keys %s\n", optarg);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'm':
|
||||||
|
mphf_file = strdup(optarg);
|
||||||
|
break;
|
||||||
|
case 'v':
|
||||||
|
++verbosity;
|
||||||
|
break;
|
||||||
|
case 'V':
|
||||||
|
printf("%s\n", VERSION);
|
||||||
|
return 0;
|
||||||
|
case 'h':
|
||||||
|
usage_long(argv[0]);
|
||||||
|
return 0;
|
||||||
|
default:
|
||||||
|
usage(argv[0]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (optind != argc - 1)
|
||||||
|
{
|
||||||
|
usage(argv[0]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
keys_file = argv[optind];
|
||||||
|
|
||||||
|
int ret = 0;
|
||||||
|
if (mphf_file == NULL)
|
||||||
|
{
|
||||||
|
mphf_file = (char *)malloc(strlen(keys_file) + 5);
|
||||||
|
memcpy(mphf_file, keys_file, strlen(keys_file));
|
||||||
|
memcpy(mphf_file + strlen(keys_file), ".mph\0", 5);
|
||||||
|
}
|
||||||
|
|
||||||
|
keys_fd = fopen(keys_file, "r");
|
||||||
|
|
||||||
|
if (keys_fd == NULL)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Unable to open file %s: %s\n", keys_file, strerror(errno));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(nkeys == UINT_MAX) source = cmph_io_nlfile_adapter(keys_fd);
|
||||||
|
else source = cmph_io_nlnkfile_adapter(keys_fd, nkeys);
|
||||||
|
|
||||||
|
cmph_uint8 * hashtable = NULL;
|
||||||
|
mphf_fd = fopen(mphf_file, "r");
|
||||||
|
if (mphf_fd == NULL)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Unable to open input file %s: %s\n", mphf_file, strerror(errno));
|
||||||
|
free(mphf_file);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
mphf = cmph_load(mphf_fd);
|
||||||
|
fclose(mphf_fd);
|
||||||
|
if (!mphf)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Unable to parser input file %s\n", mphf_file);
|
||||||
|
free(mphf_file);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
cmph_uint32 siz = cmph_size(mphf);
|
||||||
|
hashtable = (cmph_uint8*)malloc(siz*sizeof(cmph_uint8));
|
||||||
|
memset(hashtable, 0, siz);
|
||||||
|
//check all keys
|
||||||
|
for (i = 0; i < source->nkeys; ++i)
|
||||||
|
{
|
||||||
|
cmph_uint32 h;
|
||||||
|
char *buf;
|
||||||
|
cmph_uint32 buflen = 0;
|
||||||
|
source->read(source->data, &buf, &buflen);
|
||||||
|
h = cmph_search_fingerprint(mphf, buf, buflen, fingerprint);
|
||||||
|
if (!(h < siz))
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Unknown key %*s in the input.\n", buflen, buf);
|
||||||
|
ret = 1;
|
||||||
|
} else if(hashtable[h])
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Duplicated or unknown key %*s in the input\n", buflen, buf);
|
||||||
|
ret = 1;
|
||||||
|
} else hashtable[h] = 1;
|
||||||
|
|
||||||
|
if (verbosity)
|
||||||
|
{
|
||||||
|
printf("%s -> %u -- fingerprint: %u %u %u\n", buf, h, fingerprint[0], fingerprint[1], fingerprint[2]);
|
||||||
|
}
|
||||||
|
source->dispose(source->data, buf, buflen);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_destroy(mphf);
|
||||||
|
free(hashtable);
|
||||||
|
|
||||||
|
fclose(keys_fd);
|
||||||
|
free(mphf_file);
|
||||||
|
cmph_io_nlfile_adapter_destroy(source);
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
}
|
161
tests/mphf_tests.c
Normal file
161
tests/mphf_tests.c
Normal file
@ -0,0 +1,161 @@
|
|||||||
|
#ifdef WIN32
|
||||||
|
#include "../wingetopt.h"
|
||||||
|
#else
|
||||||
|
#include <getopt.h>
|
||||||
|
#endif
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <cmph.h>
|
||||||
|
|
||||||
|
#ifdef WIN32
|
||||||
|
#define VERSION "0.8"
|
||||||
|
#else
|
||||||
|
#include "config.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
void usage(const char *prg)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-m file.mph] keysfile\n", prg);
|
||||||
|
}
|
||||||
|
void usage_long(const char *prg)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-m file.mph] keysfile\n", prg);
|
||||||
|
fprintf(stderr, "Packed MPHFs testing tool\n\n");
|
||||||
|
fprintf(stderr, " -h\t print this help message\n");
|
||||||
|
fprintf(stderr, " -V\t print version number and exit\n");
|
||||||
|
fprintf(stderr, " -v\t increase verbosity (may be used multiple times)\n");
|
||||||
|
fprintf(stderr, " -k\t number of keys\n");
|
||||||
|
fprintf(stderr, " -m\t minimum perfect hash function file \n");
|
||||||
|
fprintf(stderr, " keysfile\t line separated file with keys\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
char verbosity = 0;
|
||||||
|
char *mphf_file = NULL;
|
||||||
|
const char *keys_file = NULL;
|
||||||
|
FILE *mphf_fd = stdout;
|
||||||
|
FILE *keys_fd;
|
||||||
|
cmph_uint32 nkeys = UINT_MAX;
|
||||||
|
cmph_uint32 i = 0;
|
||||||
|
cmph_t *mphf = NULL;
|
||||||
|
cmph_io_adapter_t *source;
|
||||||
|
while (1)
|
||||||
|
{
|
||||||
|
char ch = getopt(argc, argv, "hVvk:m:");
|
||||||
|
if (ch == -1) break;
|
||||||
|
switch (ch)
|
||||||
|
{
|
||||||
|
case 'k':
|
||||||
|
{
|
||||||
|
char *endptr;
|
||||||
|
nkeys = strtoul(optarg, &endptr, 10);
|
||||||
|
if(*endptr != 0) {
|
||||||
|
fprintf(stderr, "Invalid number of keys %s\n", optarg);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'm':
|
||||||
|
mphf_file = strdup(optarg);
|
||||||
|
break;
|
||||||
|
case 'v':
|
||||||
|
++verbosity;
|
||||||
|
break;
|
||||||
|
case 'V':
|
||||||
|
printf("%s\n", VERSION);
|
||||||
|
return 0;
|
||||||
|
case 'h':
|
||||||
|
usage_long(argv[0]);
|
||||||
|
return 0;
|
||||||
|
default:
|
||||||
|
usage(argv[0]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (optind != argc - 1)
|
||||||
|
{
|
||||||
|
usage(argv[0]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
keys_file = argv[optind];
|
||||||
|
|
||||||
|
int ret = 0;
|
||||||
|
if (mphf_file == NULL)
|
||||||
|
{
|
||||||
|
mphf_file = (char *)malloc(strlen(keys_file) + 5);
|
||||||
|
memcpy(mphf_file, keys_file, strlen(keys_file));
|
||||||
|
memcpy(mphf_file + strlen(keys_file), ".mph\0", 5);
|
||||||
|
}
|
||||||
|
|
||||||
|
keys_fd = fopen(keys_file, "r");
|
||||||
|
|
||||||
|
if (keys_fd == NULL)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Unable to open file %s: %s\n", keys_file, strerror(errno));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(nkeys == UINT_MAX) source = cmph_io_nlfile_adapter(keys_fd);
|
||||||
|
else source = cmph_io_nlnkfile_adapter(keys_fd, nkeys);
|
||||||
|
|
||||||
|
cmph_uint8 * hashtable = NULL;
|
||||||
|
mphf_fd = fopen(mphf_file, "r");
|
||||||
|
if (mphf_fd == NULL)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Unable to open input file %s: %s\n", mphf_file, strerror(errno));
|
||||||
|
free(mphf_file);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
mphf = cmph_load(mphf_fd);
|
||||||
|
fclose(mphf_fd);
|
||||||
|
if (!mphf)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Unable to parser input file %s\n", mphf_file);
|
||||||
|
free(mphf_file);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
cmph_uint32 siz = cmph_size(mphf);
|
||||||
|
hashtable = (cmph_uint8*)malloc(siz*sizeof(cmph_uint8));
|
||||||
|
memset(hashtable, 0, siz);
|
||||||
|
//check all keys
|
||||||
|
for (i = 0; i < source->nkeys; ++i)
|
||||||
|
{
|
||||||
|
cmph_uint32 h;
|
||||||
|
char *buf;
|
||||||
|
cmph_uint32 buflen = 0;
|
||||||
|
source->read(source->data, &buf, &buflen);
|
||||||
|
h = cmph_search(mphf, buf, buflen);
|
||||||
|
if (!(h < siz))
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Unknown key %*s in the input.\n", buflen, buf);
|
||||||
|
ret = 1;
|
||||||
|
} else if(hashtable[h])
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Duplicated or unknown key %*s in the input\n", buflen, buf);
|
||||||
|
ret = 1;
|
||||||
|
} else hashtable[h] = 1;
|
||||||
|
|
||||||
|
if (verbosity)
|
||||||
|
{
|
||||||
|
printf("%s -> %u\n", buf, h);
|
||||||
|
}
|
||||||
|
source->dispose(source->data, buf, buflen);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmph_destroy(mphf);
|
||||||
|
free(hashtable);
|
||||||
|
|
||||||
|
fclose(keys_fd);
|
||||||
|
free(mphf_file);
|
||||||
|
cmph_io_nlfile_adapter_destroy(source);
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
}
|
177
tests/packed_mphf_tests.c
Normal file
177
tests/packed_mphf_tests.c
Normal file
@ -0,0 +1,177 @@
|
|||||||
|
#ifdef WIN32
|
||||||
|
#include "../wingetopt.h"
|
||||||
|
#else
|
||||||
|
#include <getopt.h>
|
||||||
|
#endif
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <cmph.h>
|
||||||
|
//#include "hash.h"
|
||||||
|
|
||||||
|
#ifdef WIN32
|
||||||
|
#define VERSION "0.8"
|
||||||
|
#else
|
||||||
|
#include "config.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
void usage(const char *prg)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-m file.mph] keysfile\n", prg);
|
||||||
|
}
|
||||||
|
void usage_long(const char *prg)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-m file.mph] keysfile\n", prg);
|
||||||
|
fprintf(stderr, "Packed MPHFs testing tool\n\n");
|
||||||
|
fprintf(stderr, " -h\t print this help message\n");
|
||||||
|
fprintf(stderr, " -V\t print version number and exit\n");
|
||||||
|
fprintf(stderr, " -v\t increase verbosity (may be used multiple times)\n");
|
||||||
|
fprintf(stderr, " -k\t number of keys\n");
|
||||||
|
fprintf(stderr, " -m\t minimum perfect hash function file \n");
|
||||||
|
fprintf(stderr, " keysfile\t line separated file with keys\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
char verbosity = 0;
|
||||||
|
char *mphf_file = NULL;
|
||||||
|
const char *keys_file = NULL;
|
||||||
|
FILE *mphf_fd = stdout;
|
||||||
|
FILE *keys_fd;
|
||||||
|
cmph_uint32 nkeys = UINT_MAX;
|
||||||
|
cmph_uint32 i = 0;
|
||||||
|
cmph_t *mphf = NULL;
|
||||||
|
cmph_io_adapter_t *source;
|
||||||
|
while (1)
|
||||||
|
{
|
||||||
|
char ch = getopt(argc, argv, "hVvk:m:");
|
||||||
|
if (ch == -1) break;
|
||||||
|
switch (ch)
|
||||||
|
{
|
||||||
|
case 'k':
|
||||||
|
{
|
||||||
|
char *endptr;
|
||||||
|
nkeys = strtoul(optarg, &endptr, 10);
|
||||||
|
if(*endptr != 0) {
|
||||||
|
fprintf(stderr, "Invalid number of keys %s\n", optarg);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'm':
|
||||||
|
mphf_file = strdup(optarg);
|
||||||
|
break;
|
||||||
|
case 'v':
|
||||||
|
++verbosity;
|
||||||
|
break;
|
||||||
|
case 'V':
|
||||||
|
printf("%s\n", VERSION);
|
||||||
|
return 0;
|
||||||
|
case 'h':
|
||||||
|
usage_long(argv[0]);
|
||||||
|
return 0;
|
||||||
|
default:
|
||||||
|
usage(argv[0]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (optind != argc - 1)
|
||||||
|
{
|
||||||
|
usage(argv[0]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
keys_file = argv[optind];
|
||||||
|
|
||||||
|
int ret = 0;
|
||||||
|
if (mphf_file == NULL)
|
||||||
|
{
|
||||||
|
mphf_file = (char *)malloc(strlen(keys_file) + 5);
|
||||||
|
memcpy(mphf_file, keys_file, strlen(keys_file));
|
||||||
|
memcpy(mphf_file + strlen(keys_file), ".mph\0", 5);
|
||||||
|
}
|
||||||
|
|
||||||
|
keys_fd = fopen(keys_file, "r");
|
||||||
|
|
||||||
|
if (keys_fd == NULL)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Unable to open file %s: %s\n", keys_file, strerror(errno));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(nkeys == UINT_MAX) source = cmph_io_nlfile_adapter(keys_fd);
|
||||||
|
else source = cmph_io_nlnkfile_adapter(keys_fd, nkeys);
|
||||||
|
|
||||||
|
cmph_uint8 * hashtable = NULL;
|
||||||
|
mphf_fd = fopen(mphf_file, "r");
|
||||||
|
if (mphf_fd == NULL)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Unable to open input file %s: %s\n", mphf_file, strerror(errno));
|
||||||
|
free(mphf_file);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
mphf = cmph_load(mphf_fd);
|
||||||
|
fclose(mphf_fd);
|
||||||
|
if (!mphf)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Unable to parser input file %s\n", mphf_file);
|
||||||
|
free(mphf_file);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
cmph_uint32 siz = cmph_size(mphf);
|
||||||
|
hashtable = (cmph_uint8*)malloc(siz*sizeof(cmph_uint8));
|
||||||
|
memset(hashtable, 0, siz);
|
||||||
|
|
||||||
|
// packing the function
|
||||||
|
/* Determine how much space is needed to pack the mphf. */
|
||||||
|
cmph_uint32 packed_size = cmph_packed_size(mphf);
|
||||||
|
fprintf(stderr, "packed_size = %u\n", packed_size);
|
||||||
|
|
||||||
|
/* Make sure that we have enough space to pack the mphf. */
|
||||||
|
cmph_uint8 * packed_mphf = calloc(packed_size,1);
|
||||||
|
|
||||||
|
/* Pack the mphf. */
|
||||||
|
cmph_pack(mphf, packed_mphf);
|
||||||
|
|
||||||
|
// testing the packed function
|
||||||
|
//check all keys
|
||||||
|
for (i = 0; i < source->nkeys; ++i)
|
||||||
|
{
|
||||||
|
cmph_uint32 h;
|
||||||
|
char *buf;
|
||||||
|
cmph_uint32 buflen = 0;
|
||||||
|
source->read(source->data, &buf, &buflen);
|
||||||
|
h = cmph_search_packed(packed_mphf, buf, buflen);
|
||||||
|
|
||||||
|
if (!(h < siz))
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Unknown key %*s in the input.\n", buflen, buf);
|
||||||
|
ret = 1;
|
||||||
|
} else if(hashtable[h])
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Duplicated or unknown key %*s in the input\n", buflen, buf);
|
||||||
|
ret = 1;
|
||||||
|
} else hashtable[h] = 1;
|
||||||
|
|
||||||
|
if (verbosity)
|
||||||
|
{
|
||||||
|
printf("%s -> %u\n", buf, h);
|
||||||
|
}
|
||||||
|
source->dispose(source->data, buf, buflen);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(packed_mphf);
|
||||||
|
cmph_destroy(mphf);
|
||||||
|
free(hashtable);
|
||||||
|
|
||||||
|
fclose(keys_fd);
|
||||||
|
free(mphf_file);
|
||||||
|
cmph_io_nlfile_adapter_destroy(source);
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user