From b5cc0a8ea845e2ab24c86625d70a0c7d651000bf Mon Sep 17 00:00:00 2001 From: davi Date: Mon, 28 Apr 2008 01:18:23 +0000 Subject: [PATCH] Removed fingerprint methods and fixed pending bugs. --- src/bdz.c | 27 ----- src/bdz.h | 14 --- src/bdz_ph.c | 38 ------- src/bdz_ph.h | 14 --- src/bmz.c | 31 +----- src/bmz.h | 14 --- src/bmz8.c | 29 ------ src/bmz8.h | 14 --- src/brz.c | 28 +----- src/brz.h | 14 --- src/chm.c | 34 +------ src/chm.h | 14 --- src/cmph.c | 48 --------- src/cmph.h | 16 --- src/fch.c | 27 ----- src/fch.h | 14 --- src/main.c | 2 +- tests/Makefile.am | 5 +- tests/mphf_fingerprint_tests.c | 162 ----------------------------- wingetopt.c | 179 --------------------------------- wingetopt.h | 25 ----- 21 files changed, 6 insertions(+), 743 deletions(-) delete mode 100644 tests/mphf_fingerprint_tests.c delete mode 100644 wingetopt.c delete mode 100644 wingetopt.h diff --git a/src/bdz.c b/src/bdz.c index 280601d..280731e 100755 --- a/src/bdz.c +++ b/src/bdz.c @@ -599,33 +599,6 @@ void bdz_destroy(cmph_t *mphf) free(mphf); } -/** cmph_uint32 bdz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint); - * \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., figerprint should be a prealocated area to fit three 4-byte integers). - * \param mphf pointer to the resulting function - * \param key is the key to be hashed - * \param keylen is the key legth in bytes - * \return The mphf value - * - * Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be - * a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes - * as fingerprint. According to the application, just few bits can be enough, once mphf does - * not allow collisions for the keys previously known. - */ -cmph_uint32 bdz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint) -{ - register cmph_uint32 vertex; - register bdz_data_t *bdz = mphf->data; - cmph_uint32 hl[3]; - - hash_vector(bdz->hl, key, keylen, hl); - memcpy(fingerprint, hl, sizeof(hl)); - hl[0] = hl[0] % bdz->r; - hl[1] = hl[1] % bdz->r + bdz->r; - hl[2] = hl[2] % bdz->r + (bdz->r << 1); - vertex = hl[(GETVALUE(bdz->g, hl[0]) + GETVALUE(bdz->g, hl[1]) + GETVALUE(bdz->g, hl[2])) % 3]; - return rank(bdz->b, bdz->ranktable, bdz->g, vertex); -} - /** \fn void bdz_pack(cmph_t *mphf, void *packed_mphf); * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. * \param mphf pointer to the resulting mphf diff --git a/src/bdz.h b/src/bdz.h index b3ad18a..f2b7b89 100755 --- a/src/bdz.h +++ b/src/bdz.h @@ -17,20 +17,6 @@ int bdz_dump(cmph_t *mphf, FILE *f); void bdz_destroy(cmph_t *mphf); cmph_uint32 bdz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen); -/** cmph_uint32 bdz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint); - * \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., figerprint should be a prealocated area to fit three 4-byte integers). - * \param mphf pointer to the resulting function - * \param key is the key to be hashed - * \param keylen is the key legth in bytes - * \return The mphf value - * - * Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be - * a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes - * as fingerprint. According to the application, just few bits can be enough, once mphf does - * not allow collisions for the keys previously known. - */ -cmph_uint32 bdz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint); - /** \fn void bdz_pack(cmph_t *mphf, void *packed_mphf); * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. * \param mphf pointer to the resulting mphf diff --git a/src/bdz_ph.c b/src/bdz_ph.c index 5ec6718..cf1f76b 100755 --- a/src/bdz_ph.c +++ b/src/bdz_ph.c @@ -527,44 +527,6 @@ void bdz_ph_destroy(cmph_t *mphf) free(mphf); } -/** cmph_uint32 bdz_ph_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint); - * \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., figerprint should be a prealocated area to fit three 4-byte integers). - * \param mphf pointer to the resulting function - * \param key is the key to be hashed - * \param keylen is the key legth in bytes - * \return The mphf value - * - * Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be - * a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes - * as fingerprint. According to the application, just few bits can be enough, once mphf does - * not allow collisions for the keys previously known. - */ -cmph_uint32 bdz_ph_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint) -{ - register bdz_ph_data_t *bdz_ph = mphf->data; - cmph_uint32 hl[3]; - register cmph_uint8 byte0, byte1, byte2; - register cmph_uint32 vertex; - - hash_vector(bdz_ph->hl, key, keylen,hl); - memcpy(fingerprint, hl, sizeof(hl)); - - hl[0] = hl[0] % bdz_ph->r; - hl[1] = hl[1] % bdz_ph->r + bdz_ph->r; - hl[2] = hl[2] % bdz_ph->r + (bdz_ph->r << 1); - - byte0 = bdz_ph->g[hl[0]/5]; - byte1 = bdz_ph->g[hl[1]/5]; - byte2 = bdz_ph->g[hl[2]/5]; - - byte0 = lookup_table[hl[0]%5][byte0]; - byte1 = lookup_table[hl[1]%5][byte1]; - byte2 = lookup_table[hl[2]%5][byte2]; - vertex = hl[(byte0 + byte1 + byte2)%3]; - - return vertex; -} - /** \fn void bdz_ph_pack(cmph_t *mphf, void *packed_mphf); * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. * \param mphf pointer to the resulting mphf diff --git a/src/bdz_ph.h b/src/bdz_ph.h index 7ddd4b8..73cce2e 100755 --- a/src/bdz_ph.h +++ b/src/bdz_ph.h @@ -16,20 +16,6 @@ int bdz_ph_dump(cmph_t *mphf, FILE *f); void bdz_ph_destroy(cmph_t *mphf); cmph_uint32 bdz_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen); -/** cmph_uint32 bdz_ph_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint); - * \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., figerprint should be a prealocated area to fit three 4-byte integers). - * \param mphf pointer to the resulting function - * \param key is the key to be hashed - * \param keylen is the key legth in bytes - * \return The mphf value - * - * Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be - * a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes - * as fingerprint. According to the application, just few bits can be enough, once mphf does - * not allow collisions for the keys previously known. - */ -cmph_uint32 bdz_ph_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint); - /** \fn void bdz_ph_pack(cmph_t *mphf, void *packed_mphf); * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. * \param mphf pointer to the resulting mphf diff --git a/src/bmz.c b/src/bmz.c index 1312f78..71f9fd6 100644 --- a/src/bmz.c +++ b/src/bmz.c @@ -22,7 +22,7 @@ static void bmz_traverse_non_critical_nodes(bmz_config_data_t *bmz, cmph_uint8 * bmz_config_data_t *bmz_config_new() { - bmz_config_data_t *bmz; + bmz_config_data_t *bmz = NULL; bmz = (bmz_config_data_t *)malloc(sizeof(bmz_config_data_t)); assert(bmz); memset(bmz, 0, sizeof(bmz_config_data_t)); @@ -539,35 +539,6 @@ void bmz_destroy(cmph_t *mphf) free(mphf); } -/** cmph_uint32 bmz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint); - * \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., figerprint should be a prealocated area to fit three 4-byte integers). - * \param mphf pointer to the resulting function - * \param key is the key to be hashed - * \param keylen is the key legth in bytes - * \return The mphf value - * - * Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be - * a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes - * as fingerprint. According to the application, just few bits can be enough, once mphf does - * not allow collisions for the keys previously known. - */ -cmph_uint32 bmz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint) -{ - bmz_data_t *bmz = mphf->data; - cmph_uint32 h1, h2; - - hash_vector(bmz->hashes[0], key, keylen, fingerprint); - h1 = fingerprint[2] % bmz->n; - - hash_vector(bmz->hashes[1], key, keylen, fingerprint); - h2 = fingerprint[2] % bmz->n; - - DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2); - if (h1 == h2 && ++h2 > bmz->n) h2 = 0; - DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, bmz->g[h1], bmz->g[h2], bmz->m); - return bmz->g[h1] + bmz->g[h2]; -} - /** \fn void bmz_pack(cmph_t *mphf, void *packed_mphf); * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. * \param mphf pointer to the resulting mphf diff --git a/src/bmz.h b/src/bmz.h index 36e27df..ee5f61d 100644 --- a/src/bmz.h +++ b/src/bmz.h @@ -16,20 +16,6 @@ int bmz_dump(cmph_t *mphf, FILE *f); void bmz_destroy(cmph_t *mphf); cmph_uint32 bmz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen); -/** cmph_uint32 bmz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint); - * \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., figerprint should be a prealocated area to fit three 4-byte integers). - * \param mphf pointer to the resulting function - * \param key is the key to be hashed - * \param keylen is the key legth in bytes - * \return The mphf value - * - * Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be - * a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes - * as fingerprint. According to the application, just few bits can be enough, once mphf does - * not allow collisions for the keys previously known. - */ -cmph_uint32 bmz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint); - /** \fn void bmz_pack(cmph_t *mphf, void *packed_mphf); * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. * \param mphf pointer to the resulting mphf diff --git a/src/bmz8.c b/src/bmz8.c index 46ae514..efc816c 100644 --- a/src/bmz8.c +++ b/src/bmz8.c @@ -548,35 +548,6 @@ void bmz8_destroy(cmph_t *mphf) free(mphf); } -/** cmph_uint8 bmz8_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint); - * \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., figerprint should be a prealocated area to fit three 4-byte integers). - * \param mphf pointer to the resulting function - * \param key is the key to be hashed - * \param keylen is the key legth in bytes - * \return The mphf value - * - * Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be - * a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes - * as fingerprint. According to the application, just few bits can be enough, once mphf does - * not allow collisions for the keys previously known. - */ -cmph_uint8 bmz8_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint) -{ - bmz8_data_t *bmz8 = mphf->data; - cmph_uint8 h1, h2; - - hash_vector(bmz8->hashes[0], key, keylen, fingerprint); - h1 = fingerprint[2] % bmz8->n; - - hash_vector(bmz8->hashes[1], key, keylen, fingerprint); - h2 = fingerprint[2] % bmz8->n; - - DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2); - if (h1 == h2 && ++h2 > bmz8->n) h2 = 0; - DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, bmz8->g[h1], bmz8->g[h2], bmz8->m); - return bmz8->g[h1] + bmz8->g[h2]; -} - /** \fn void bmz8_pack(cmph_t *mphf, void *packed_mphf); * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. * \param mphf pointer to the resulting mphf diff --git a/src/bmz8.h b/src/bmz8.h index b4d4a38..5456759 100644 --- a/src/bmz8.h +++ b/src/bmz8.h @@ -16,20 +16,6 @@ int bmz8_dump(cmph_t *mphf, FILE *f); void bmz8_destroy(cmph_t *mphf); cmph_uint8 bmz8_search(cmph_t *mphf, const char *key, cmph_uint32 keylen); -/** cmph_uint8 bmz8_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint); - * \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., figerprint should be a prealocated area to fit three 4-byte integers). - * \param mphf pointer to the resulting function - * \param key is the key to be hashed - * \param keylen is the key legth in bytes - * \return The mphf value - * - * Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be - * a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes - * as fingerprint. According to the application, just few bits can be enough, once mphf does - * not allow collisions for the keys previously known. - */ -cmph_uint8 bmz8_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint); - /** \fn void bmz8_pack(cmph_t *mphf, void *packed_mphf); * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. * \param mphf pointer to the resulting mphf diff --git a/src/brz.c b/src/brz.c index 213091d..4686114 100755 --- a/src/brz.c +++ b/src/brz.c @@ -712,32 +712,6 @@ void brz_destroy(cmph_t *mphf) free(mphf); } -/** cmph_uint32 brz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint); - * \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., figerprint should be a prealocated area to fit three 4-byte integers). - * \param mphf pointer to the resulting function - * \param key is the key to be hashed - * \param keylen is the key legth in bytes - * \return The mphf value - * - * Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be - * a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes - * as fingerprint. According to the application, just few bits can be enough, once mphf does - * not allow collisions for the keys previously known. - */ -cmph_uint32 brz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint) -{ - brz_data_t *brz = mphf->data; - switch(brz->algo) - { - case CMPH_FCH: - return brz_fch_search(brz, key, keylen, fingerprint); - case CMPH_BMZ8: - return brz_bmz8_search(brz, key, keylen, fingerprint); - default: assert(0); - } - return 0; -} - /** \fn void brz_pack(cmph_t *mphf, void *packed_mphf); * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. * \param mphf pointer to the resulting mphf @@ -791,7 +765,7 @@ void brz_pack(cmph_t *mphf, void *packed_mphf) #if defined (__ia64) || defined (__x86_64__) cmph_uint64 * g_is_ptr = (cmph_uint64 *)ptr; #else - cmph_uint32 * g_is_ptr = ptr; + cmph_uint32 * g_is_ptr = (cmph_uint32 *)ptr; #endif cmph_uint8 * g_i = (cmph_uint8 *) (g_is_ptr + data->k); diff --git a/src/brz.h b/src/brz.h index a7c7293..ac07ed7 100644 --- a/src/brz.h +++ b/src/brz.h @@ -21,20 +21,6 @@ int brz_dump(cmph_t *mphf, FILE *f); void brz_destroy(cmph_t *mphf); cmph_uint32 brz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen); -/** cmph_uint32 brz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint); - * \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., figerprint should be a prealocated area to fit three 4-byte integers). - * \param mphf pointer to the resulting function - * \param key is the key to be hashed - * \param keylen is the key legth in bytes - * \return The mphf value - * - * Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be - * a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes - * as fingerprint. According to the application, just few bits can be enough, once mphf does - * not allow collisions for the keys previously known. - */ -cmph_uint32 brz_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint); - /** \fn void brz_pack(cmph_t *mphf, void *packed_mphf); * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. * \param mphf pointer to the resulting mphf diff --git a/src/chm.c b/src/chm.c index 0fdce44..daf0ae7 100644 --- a/src/chm.c +++ b/src/chm.c @@ -19,10 +19,10 @@ static void chm_traverse(chm_config_data_t *chm, cmph_uint8 *visited, cmph_uint3 chm_config_data_t *chm_config_new() { - chm_config_data_t *chm; + chm_config_data_t *chm = NULL; chm = (chm_config_data_t *)malloc(sizeof(chm_config_data_t)); assert(chm); - memset(chm,0,sizeof(chm_config_data_t)); + memset(chm, 0, sizeof(chm_config_data_t)); chm->hashfuncs[0] = CMPH_HASH_JENKINS; chm->hashfuncs[1] = CMPH_HASH_JENKINS; chm->g = NULL; @@ -293,36 +293,6 @@ void chm_destroy(cmph_t *mphf) free(mphf); } - -/** cmph_uint32 chm_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint); - * \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., figerprint should be a prealocated area to fit three 4-byte integers). - * \param mphf pointer to the resulting function - * \param key is the key to be hashed - * \param keylen is the key legth in bytes - * \return The mphf value - * - * Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be - * a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes - * as fingerprint. According to the application, just few bits can be enough, once mphf does - * not allow collisions for the keys previously known. - */ -cmph_uint32 chm_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint) -{ - chm_data_t *chm = mphf->data; - cmph_uint32 h1, h2; - - hash_vector(chm->hashes[0], key, keylen, fingerprint); - h1 = fingerprint[2] % chm->n; - - hash_vector(chm->hashes[1], key, keylen, fingerprint); - h2 = fingerprint[2] % chm->n; - - DEBUGP("key: %s h1: %u h2: %u\n", key, h1, h2); - if (h1 == h2 && ++h2 >= chm->n) h2 = 0; - DEBUGP("key: %s g[h1]: %u g[h2]: %u edges: %u\n", key, chm->g[h1], chm->g[h2], chm->m); - return (chm->g[h1] + chm->g[h2]) % chm->m; -} - /** \fn void chm_pack(cmph_t *mphf, void *packed_mphf); * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. * \param mphf pointer to the resulting mphf diff --git a/src/chm.h b/src/chm.h index 77bbc60..341be29 100644 --- a/src/chm.h +++ b/src/chm.h @@ -16,20 +16,6 @@ int chm_dump(cmph_t *mphf, FILE *f); void chm_destroy(cmph_t *mphf); cmph_uint32 chm_search(cmph_t *mphf, const char *key, cmph_uint32 keylen); -/** cmph_uint32 chm_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint); - * \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., figerprint should be a prealocated area to fit three 4-byte integers). - * \param mphf pointer to the resulting function - * \param key is the key to be hashed - * \param keylen is the key legth in bytes - * \return The mphf value - * - * Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be - * a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes - * as fingerprint. According to the application, just few bits can be enough, once mphf does - * not allow collisions for the keys previously known. - */ -cmph_uint32 chm_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint); - /** \fn void chm_pack(cmph_t *mphf, void *packed_mphf); * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. * \param mphf pointer to the resulting mphf diff --git a/src/cmph.c b/src/cmph.c index a22304e..77b3262 100644 --- a/src/cmph.c +++ b/src/cmph.c @@ -612,54 +612,6 @@ cmph_uint32 cmph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen) return 0; } - - -/** cmph_uint32 cmph_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint); - * \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., figerprint should be a prealocated area to fit three 4-byte integers). - * \param mphf pointer to the resulting function - * \param key is the key to be hashed - * \param keylen is the key legth in bytes - * \return The mphf value - * - * Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be - * a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes - * as fingerprint. According to the application, just few bits can be enough, once mphf does - * not allow collisions for the keys previously known. - */ -cmph_uint32 cmph_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint) -{ - DEBUGP("mphf algorithm: %u \n", mphf->algo); - switch(mphf->algo) - { - case CMPH_CHM: - return chm_search_fingerprint(mphf, key, keylen, fingerprint); - case CMPH_BMZ: /* included -- Fabiano */ - DEBUGP("bmz algorithm search\n"); - return bmz_search_fingerprint(mphf, key, keylen, fingerprint); - case CMPH_BMZ8: /* included -- Fabiano */ - DEBUGP("bmz8 algorithm search\n"); - return bmz8_search_fingerprint(mphf, key, keylen, fingerprint); - case CMPH_BRZ: /* included -- Fabiano */ - DEBUGP("brz algorithm search\n"); - return brz_search_fingerprint(mphf, key, keylen, fingerprint); - case CMPH_FCH: /* included -- Fabiano */ - DEBUGP("fch algorithm search\n"); - return fch_search_fingerprint(mphf, key, keylen, fingerprint); - case CMPH_BDZ: /* included -- Fabiano */ - DEBUGP("bdz algorithm search\n"); - return bdz_search_fingerprint(mphf, key, keylen, fingerprint); - case CMPH_BDZ_PH: /* included -- Fabiano */ - DEBUGP("bdz_ph algorithm search\n"); - return bdz_ph_search_fingerprint(mphf, key, keylen, fingerprint); - default: - assert(0); - } - assert(0); - return 0; -} - - - cmph_uint32 cmph_size(cmph_t *mphf) { return mphf->size; diff --git a/src/cmph.h b/src/cmph.h index d719f4e..758f103 100644 --- a/src/cmph.h +++ b/src/cmph.h @@ -69,22 +69,6 @@ cmph_t *cmph_new(cmph_config_t *mph); */ cmph_uint32 cmph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen); -/** cmph_uint32 cmph_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint); - * \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., - * \brief figerprint should be a prealocated area to fit three 4-byte integers). - * \param mphf pointer to the resulting function - * \param key is the key to be hashed - * \param keylen is the key legth in bytes - * \return The mphf value - * - * Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be - * a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes - * as fingerprint. According to the application, just few bits can be enough, once mphf does - * not allow collisions for the keys previously known. - */ -cmph_uint32 cmph_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint); - - cmph_uint32 cmph_size(cmph_t *mphf); void cmph_destroy(cmph_t *mphf); diff --git a/src/fch.c b/src/fch.c index 33bca10..aa47770 100644 --- a/src/fch.c +++ b/src/fch.c @@ -411,33 +411,6 @@ void fch_destroy(cmph_t *mphf) free(mphf); } -/** cmph_uint32 fch_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint); - * \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., figerprint should be a prealocated area to fit three 4-byte integers). - * \param mphf pointer to the resulting function - * \param key is the key to be hashed - * \param keylen is the key legth in bytes - * \return The mphf value - * - * Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be - * a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes - * as fingerprint. According to the application, just few bits can be enough, once mphf does - * not allow collisions for the keys previously known. - */ -cmph_uint32 fch_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint) -{ - register fch_data_t *fch = mphf->data; - - hash_vector(fch->h1, key, keylen, fingerprint); - register cmph_uint32 h1 = fingerprint[2] % fch->m; - - hash_vector(fch->h2, key, keylen, fingerprint); - register cmph_uint32 h2 = fingerprint[2] % fch->m; - - h1 = mixh10h11h12 (fch->b, fch->p1, fch->p2, h1); - //DEBUGP("key: %s h1: %u h2: %u g[h1]: %u\n", key, h1, h2, fch->g[h1]); - return (h2 + fch->g[h1]) % fch->m; -} - /** \fn void fch_pack(cmph_t *mphf, void *packed_mphf); * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. * \param mphf pointer to the resulting mphf diff --git a/src/fch.h b/src/fch.h index 51395f1..ec4f0f5 100644 --- a/src/fch.h +++ b/src/fch.h @@ -22,20 +22,6 @@ int fch_dump(cmph_t *mphf, FILE *f); void fch_destroy(cmph_t *mphf); cmph_uint32 fch_search(cmph_t *mphf, const char *key, cmph_uint32 keylen); -/** cmph_uint32 fch_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint); - * \brief Computes the mphf value and a fingerprint of 12 bytes (i.e., figerprint should be a prealocated area to fit three 4-byte integers). - * \param mphf pointer to the resulting function - * \param key is the key to be hashed - * \param keylen is the key legth in bytes - * \return The mphf value - * - * Computes the mphf value and a fingerprint of 12 bytes. The figerprint pointer should be - * a prealocated area to fit three 4-byte integers. You don't need to use all the 12 bytes - * as fingerprint. According to the application, just few bits can be enough, once mphf does - * not allow collisions for the keys previously known. - */ -cmph_uint32 fch_search_fingerprint(cmph_t *mphf, const char *key, cmph_uint32 keylen, cmph_uint32 * fingerprint); - /** \fn void fch_pack(cmph_t *mphf, void *packed_mphf); * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. * \param mphf pointer to the resulting mphf diff --git a/src/main.c b/src/main.c index 5a6dd1b..7ce3186 100644 --- a/src/main.c +++ b/src/main.c @@ -1,5 +1,5 @@ #ifdef WIN32 -#include "../wingetopt.h" +#include "wingetopt.h" #else #include #endif diff --git a/tests/Makefile.am b/tests/Makefile.am index 5242d4e..d0235d2 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -1,4 +1,4 @@ -noinst_PROGRAMS = graph_tests packed_mphf_tests mphf_tests mphf_fingerprint_tests +noinst_PROGRAMS = graph_tests packed_mphf_tests mphf_tests INCLUDES = -I../src/ @@ -10,6 +10,3 @@ packed_mphf_tests_LDADD = ../src/libcmph.la mphf_tests_SOURCES = mphf_tests.c mphf_tests_LDADD = ../src/libcmph.la - -mphf_fingerprint_tests_SOURCES = mphf_fingerprint_tests.c -mphf_fingerprint_tests_LDADD = ../src/libcmph.la diff --git a/tests/mphf_fingerprint_tests.c b/tests/mphf_fingerprint_tests.c deleted file mode 100644 index f5519c5..0000000 --- a/tests/mphf_fingerprint_tests.c +++ /dev/null @@ -1,162 +0,0 @@ -#ifdef WIN32 -#include "../wingetopt.h" -#else -#include -#endif -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef WIN32 -#define VERSION "0.8" -#else -#include "config.h" -#endif - - -void usage(const char *prg) -{ - fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-m file.mph] keysfile\n", prg); -} -void usage_long(const char *prg) -{ - fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-m file.mph] keysfile\n", prg); - fprintf(stderr, "Packed MPHFs testing tool\n\n"); - fprintf(stderr, " -h\t print this help message\n"); - fprintf(stderr, " -V\t print version number and exit\n"); - fprintf(stderr, " -v\t increase verbosity (may be used multiple times)\n"); - fprintf(stderr, " -k\t number of keys\n"); - fprintf(stderr, " -m\t minimum perfect hash function file \n"); - fprintf(stderr, " keysfile\t line separated file with keys\n"); -} - -int main(int argc, char **argv) -{ - char verbosity = 0; - char *mphf_file = NULL; - const char *keys_file = NULL; - FILE *mphf_fd = stdout; - FILE *keys_fd; - cmph_uint32 nkeys = UINT_MAX; - cmph_uint32 i = 0; - cmph_t *mphf = NULL; - cmph_io_adapter_t *source; - cmph_uint32 fingerprint[3]; - while (1) - { - char ch = getopt(argc, argv, "hVvk:m:"); - if (ch == -1) break; - switch (ch) - { - case 'k': - { - char *endptr; - nkeys = strtoul(optarg, &endptr, 10); - if(*endptr != 0) { - fprintf(stderr, "Invalid number of keys %s\n", optarg); - exit(1); - } - } - break; - case 'm': - mphf_file = strdup(optarg); - break; - case 'v': - ++verbosity; - break; - case 'V': - printf("%s\n", VERSION); - return 0; - case 'h': - usage_long(argv[0]); - return 0; - default: - usage(argv[0]); - return 1; - } - } - - if (optind != argc - 1) - { - usage(argv[0]); - return 1; - } - keys_file = argv[optind]; - - int ret = 0; - if (mphf_file == NULL) - { - mphf_file = (char *)malloc(strlen(keys_file) + 5); - memcpy(mphf_file, keys_file, strlen(keys_file)); - memcpy(mphf_file + strlen(keys_file), ".mph\0", (size_t)5); - } - - keys_fd = fopen(keys_file, "r"); - - if (keys_fd == NULL) - { - fprintf(stderr, "Unable to open file %s: %s\n", keys_file, strerror(errno)); - return -1; - } - - if(nkeys == UINT_MAX) source = cmph_io_nlfile_adapter(keys_fd); - else source = cmph_io_nlnkfile_adapter(keys_fd, nkeys); - - cmph_uint8 * hashtable = NULL; - mphf_fd = fopen(mphf_file, "r"); - if (mphf_fd == NULL) - { - fprintf(stderr, "Unable to open input file %s: %s\n", mphf_file, strerror(errno)); - free(mphf_file); - return -1; - } - mphf = cmph_load(mphf_fd); - fclose(mphf_fd); - if (!mphf) - { - fprintf(stderr, "Unable to parser input file %s\n", mphf_file); - free(mphf_file); - return -1; - } - cmph_uint32 siz = cmph_size(mphf); - hashtable = (cmph_uint8*)malloc(siz*sizeof(cmph_uint8)); - memset(hashtable, 0, (size_t)siz); - //check all keys - for (i = 0; i < source->nkeys; ++i) - { - cmph_uint32 h; - char *buf; - cmph_uint32 buflen = 0; - source->read(source->data, &buf, &buflen); - h = cmph_search_fingerprint(mphf, buf, buflen, fingerprint); - if (!(h < siz)) - { - fprintf(stderr, "Unknown key %*s in the input.\n", buflen, buf); - ret = 1; - } else if(hashtable[h]) - { - fprintf(stderr, "Duplicated or unknown key %*s in the input\n", buflen, buf); - ret = 1; - } else hashtable[h] = 1; - - if (verbosity) - { - printf("%s -> %u -- fingerprint: %u %u %u\n", buf, h, fingerprint[0], fingerprint[1], fingerprint[2]); - } - source->dispose(source->data, buf, buflen); - } - - cmph_destroy(mphf); - free(hashtable); - - fclose(keys_fd); - free(mphf_file); - cmph_io_nlfile_adapter_destroy(source); - return ret; - -} diff --git a/wingetopt.c b/wingetopt.c deleted file mode 100644 index c981d0f..0000000 --- a/wingetopt.c +++ /dev/null @@ -1,179 +0,0 @@ -#ifdef WIN32 -/***************************************************************************** - * - * MODULE NAME : GETOPT.C - * - * COPYRIGHTS: - * This module contains code made available by IBM - * Corporation on an AS IS basis. Any one receiving the - * module is considered to be licensed under IBM copyrights - * to use the IBM-provided source code in any way he or she - * deems fit, including copying it, compiling it, modifying - * it, and redistributing it, with or without - * modifications. No license under any IBM patents or - * patent applications is to be implied from this copyright - * license. - * - * A user of the module should understand that IBM cannot - * provide technical support for the module and will not be - * responsible for any consequences of use of the program. - * - * Any notices, including this one, are not to be removed - * from the module without the prior written consent of - * IBM. - * - * AUTHOR: Original author: - * G. R. Blair (BOBBLAIR at AUSVM1) - * Internet: bobblair@bobblair.austin.ibm.com - * - * Extensively revised by: - * John Q. Walker II, Ph.D. (JOHHQ at RALVM6) - * Internet: johnq@ralvm6.vnet.ibm.com - * - *****************************************************************************/ - -/****************************************************************************** - * getopt() - * - * The getopt() function is a command line parser. It returns the next - * option character in argv that matches an option character in opstring. - * - * The argv argument points to an array of argc+1 elements containing argc - * pointers to character strings followed by a null pointer. - * - * The opstring argument points to a string of option characters; if an - * option character is followed by a colon, the option is expected to have - * an argument that may or may not be separated from it by white space. - * The external variable optarg is set to point to the start of the option - * argument on return from getopt(). - * - * The getopt() function places in optind the argv index of the next argument - * to be processed. The system initializes the external variable optind to - * 1 before the first call to getopt(). - * - * When all options have been processed (that is, up to the first nonoption - * argument), getopt() returns EOF. The special option "--" may be used to - * delimit the end of the options; EOF will be returned, and "--" will be - * skipped. - * - * The getopt() function returns a question mark (?) when it encounters an - * option character not included in opstring. This error message can be - * disabled by setting opterr to zero. Otherwise, it returns the option - * character that was detected. - * - * If the special option "--" is detected, or all options have been - * processed, EOF is returned. - * - * Options are marked by either a minus sign (-) or a slash (/). - * - * No errors are defined. - *****************************************************************************/ - -#include /* for EOF */ -#include /* for strchr() */ - -/* static (global) variables that are specified as exported by getopt() */ -extern char *optarg; /* pointer to the start of the option argument */ -extern int optind; /* number of the next argv[] to be evaluated */ -extern int opterr; /* non-zero if a question mark should be returned - when a non-valid option character is detected */ - -/* handle possible future character set concerns by putting this in a macro */ -#define _next_char(string) (char)(*(string+1)) - -int getopt(int argc, char *argv[], char *opstring) -{ - static char *pIndexPosition = NULL; /* place inside current argv string */ - char *pArgString = NULL; /* where to start from next */ - char *pOptString; /* the string in our program */ - - - if (pIndexPosition != NULL) { - /* we last left off inside an argv string */ - if (*(++pIndexPosition)) { - /* there is more to come in the most recent argv */ - pArgString = pIndexPosition; - } - } - - if (pArgString == NULL) { - /* we didn't leave off in the middle of an argv string */ - if (optind >= argc) { - /* more command-line arguments than the argument count */ - pIndexPosition = NULL; /* not in the middle of anything */ - return EOF; /* used up all command-line arguments */ - } - - /*--------------------------------------------------------------------- - * If the next argv[] is not an option, there can be no more options. - *-------------------------------------------------------------------*/ - pArgString = argv[optind++]; /* set this to the next argument ptr */ - - if (('/' != *pArgString) && /* doesn't start with a slash or a dash? */ - ('-' != *pArgString)) { - --optind; /* point to current arg once we're done */ - optarg = NULL; /* no argument follows the option */ - pIndexPosition = NULL; /* not in the middle of anything */ - return EOF; /* used up all the command-line flags */ - } - - /* check for special end-of-flags markers */ - if ((strcmp(pArgString, "-") == 0) || - (strcmp(pArgString, "--") == 0)) { - optarg = NULL; /* no argument follows the option */ - pIndexPosition = NULL; /* not in the middle of anything */ - return EOF; /* encountered the special flag */ - } - - pArgString++; /* look past the / or - */ - } - - if (':' == *pArgString) { /* is it a colon? */ - /*--------------------------------------------------------------------- - * Rare case: if opterr is non-zero, return a question mark; - * otherwise, just return the colon we're on. - *-------------------------------------------------------------------*/ - return (opterr ? (int)'?' : (int)':'); - } - else if ((pOptString = strchr(opstring, *pArgString)) == 0) { - /*--------------------------------------------------------------------- - * The letter on the command-line wasn't any good. - *-------------------------------------------------------------------*/ - optarg = NULL; /* no argument follows the option */ - pIndexPosition = NULL; /* not in the middle of anything */ - return (opterr ? (int)'?' : (int)*pArgString); - } - else { - /*--------------------------------------------------------------------- - * The letter on the command-line matches one we expect to see - *-------------------------------------------------------------------*/ - if (':' == _next_char(pOptString)) { /* is the next letter a colon? */ - /* It is a colon. Look for an argument string. */ - if ('\0' != _next_char(pArgString)) { /* argument in this argv? */ - optarg = &pArgString[1]; /* Yes, it is */ - } - else { - /*------------------------------------------------------------- - * The argument string must be in the next argv. - * But, what if there is none (bad input from the user)? - * In that case, return the letter, and optarg as NULL. - *-----------------------------------------------------------*/ - if (optind < argc) - optarg = argv[optind++]; - else { - optarg = NULL; - return (opterr ? (int)'?' : (int)*pArgString); - } - } - pIndexPosition = NULL; /* not in the middle of anything */ - } - else { - /* it's not a colon, so just return the letter */ - optarg = NULL; /* no argument follows the option */ - pIndexPosition = pArgString; /* point to the letter we're on */ - } - return (int)*pArgString; /* return the letter that matched */ - } -} - -#endif //WIN32 diff --git a/wingetopt.h b/wingetopt.h deleted file mode 100644 index 9596853..0000000 --- a/wingetopt.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef WIN32 - #include -#else - #ifndef _GETOPT_ - #define _GETOPT_ - - #include /* for EOF */ - #include /* for strchr() */ - - char *optarg = NULL; /* pointer to the start of the option argument */ - int optind = 1; /* number of the next argv[] to be evaluated */ - int opterr = 1; /* non-zero if a question mark should be returned */ - - int getopt(int argc, char *argv[], char *opstring); - #endif //_GETOPT_ -#endif //WIN32 - -#ifdef __cplusplus -} -#endif -