diff --git a/examples/Makefile.am b/examples/Makefile.am index 812919f..f20e8c7 100755 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -1,4 +1,4 @@ -noinst_PROGRAMS = vector_adapter_ex1 file_adapter_ex2 +noinst_PROGRAMS = vector_adapter_ex1 file_adapter_ex2 struct_vector_adapter_ex3 INCLUDES = -I../src/ @@ -8,3 +8,5 @@ vector_adapter_ex1_SOURCES = vector_adapter_ex1.c file_adapter_ex2_LDADD = ../src/libcmph.la file_adapter_ex2_SOURCES = file_adapter_ex2.c +struct_vector_adapter_ex3_LDADD = ../src/libcmph.la +struct_vector_adapter_ex3_SOURCES = struct_vector_adapter_ex3.c diff --git a/examples/struct_vector_adapter_ex3.c b/examples/struct_vector_adapter_ex3.c new file mode 100644 index 0000000..e3ab595 --- /dev/null +++ b/examples/struct_vector_adapter_ex3.c @@ -0,0 +1,52 @@ +#include +#include +// Create minimal perfect hash function from in-memory vector + +#pragma pack(1) +typedef struct { + cmph_uint32 id; + char key[11]; + cmph_uint32 year; +} rec_t; +#pragma pack(0) + +int main(int argc, char **argv) +{ + // Creating a filled vector + unsigned int i = 0; + rec_t vector[10] = {{1, "aaaaaaaaaa", 1999}, {2, "bbbbbbbbbb", 2000}, {3, "cccccccccc", 2001}, + {4, "dddddddddd", 2002}, {5, "eeeeeeeeee", 2003}, {6, "ffffffffff", 2004}, + {7, "gggggggggg", 2005}, {8, "hhhhhhhhhh", 2006}, {9, "iiiiiiiiii", 2007}, + {10,"jjjjjjjjjj", 2008}}; + unsigned int nkeys = 10; + FILE* mphf_fd = fopen("temp_struct_vector.mph", "w"); + // Source of keys + fprintf(stderr, "sizeof(rec_t) -> %ld\n", sizeof(rec_t)); + cmph_io_adapter_t *source = cmph_io_struct_vector_adapter(vector, sizeof(rec_t), sizeof(cmph_uint32), 11, nkeys); + + //Create minimal perfect hash function using the default (chm) algorithm. + cmph_config_t *config = cmph_config_new(source); + cmph_config_set_algo(config, CMPH_BDZ); + cmph_config_set_mphf_fd(config, mphf_fd); + cmph_t *hash = cmph_new(config); + cmph_config_destroy(config); + cmph_dump(hash, mphf_fd); + cmph_destroy(hash); + fclose(mphf_fd); + + //Find key + mphf_fd = fopen("temp_struct_vector.mph", "r"); + hash = cmph_load(mphf_fd); + while (i < nkeys) { + const char *key = vector[i].key; + unsigned int id = cmph_search(hash, key, 11); + fprintf(stderr, "key:%s -- hash:%u\n", key, id); + i++; + } + + //Destroy hash + cmph_destroy(hash); + cmph_io_vector_adapter_destroy(source); + fclose(mphf_fd); + return 0; +} diff --git a/examples/vector_adapter_ex1.c b/examples/vector_adapter_ex1.c index f25ceef..bac4390 100755 --- a/examples/vector_adapter_ex1.c +++ b/examples/vector_adapter_ex1.c @@ -4,11 +4,11 @@ int main(int argc, char **argv) { // Creating a filled vector - unsigned int i = 0; + unsigned int i = 0; const char *vector[] = {"aaaaaaaaaa", "bbbbbbbbbb", "cccccccccc", "dddddddddd", "eeeeeeeeee", - "ffffffffff", "gggggggggg", "hhhhhhhhhh", "iiiiiiiiii", "jjjjjjjjjj"}; + "ffffffffff", "gggggggggg", "hhhhhhhhhh", "iiiiiiiiii", "jjjjjjjjjj"}; unsigned int nkeys = 10; - FILE* mphf_fd = fopen("temp.mph", "w"); + FILE* mphf_fd = fopen("temp.mph", "w"); // Source of keys cmph_io_adapter_t *source = cmph_io_vector_adapter((char **)vector, nkeys); @@ -18,23 +18,23 @@ int main(int argc, char **argv) cmph_config_set_mphf_fd(config, mphf_fd); cmph_t *hash = cmph_new(config); cmph_config_destroy(config); - cmph_dump(hash, mphf_fd); + cmph_dump(hash, mphf_fd); cmph_destroy(hash); fclose(mphf_fd); //Find key - mphf_fd = fopen("temp.mph", "r"); - hash = cmph_load(mphf_fd); - while (i < nkeys) { - const char *key = vector[i]; + mphf_fd = fopen("temp.mph", "r"); + hash = cmph_load(mphf_fd); + while (i < nkeys) { + const char *key = vector[i]; unsigned int id = cmph_search(hash, key, strlen(key)); fprintf(stderr, "key:%s -- hash:%u\n", key, id); - i++; - } - + i++; + } + //Destroy hash cmph_destroy(hash); cmph_io_vector_adapter_destroy(source); - fclose(mphf_fd); + fclose(mphf_fd); return 0; } diff --git a/src/cmph.c b/src/cmph.c index 4768760..9fd260a 100644 --- a/src/cmph.c +++ b/src/cmph.c @@ -23,9 +23,35 @@ typedef struct cmph_uint32 position; // access position when data is a vector } cmph_vector_t; + + +/** + * Support a vector of struct as the source of keys. + * + * E.g. The keys could be the fieldB's in a vector of struct rec where + * struct rec is defined as: + * struct rec { + * fieldA; + * fieldB; + * fieldC; + * } + */ +typedef struct +{ + void *vector; /* Pointer to the vector of struct */ + cmph_uint32 position; /* current position */ + cmph_uint32 struct_size; /* The size of the struct */ + cmph_uint32 key_offset; /* The byte offset of the key in the struct */ + cmph_uint32 key_len; /* The length of the key */ +} cmph_struct_vector_t; + + static cmph_io_adapter_t *cmph_io_vector_new(void * vector, cmph_uint32 nkeys); static void cmph_io_vector_destroy(cmph_io_adapter_t * key_source); +static cmph_io_adapter_t *cmph_io_struct_vector_new(void * vector, cmph_uint32 struct_size, cmph_uint32 key_offset, cmph_uint32 key_len, cmph_uint32 nkeys); +static void cmph_io_struct_vector_destroy(cmph_io_adapter_t * key_source); + static int key_nlfile_read(void *data, char **key, cmph_uint32 *keylen) { FILE *fd = (FILE *)data; @@ -63,6 +89,17 @@ static int key_byte_vector_read(void *data, char **key, cmph_uint32 *keylen) } +static int key_struct_vector_read(void *data, char **key, cmph_uint32 *keylen) +{ + cmph_struct_vector_t *cmph_struct_vector = (cmph_struct_vector_t *)data; + char *keys_vd = (char *)cmph_struct_vector->vector; + *keylen = cmph_struct_vector->key_len; + *key = (char *)malloc(*keylen); + memcpy(*key, (keys_vd + (cmph_struct_vector->position * cmph_struct_vector->struct_size) + cmph_struct_vector->key_offset), *keylen); + cmph_struct_vector->position = cmph_struct_vector->position + 1; + return *keylen; +} + static int key_vector_read(void *data, char **key, cmph_uint32 *keylen) { cmph_vector_t *cmph_vector = (cmph_vector_t *)data; @@ -92,6 +129,12 @@ static void key_nlfile_rewind(void *data) rewind(fd); } +static void key_struct_vector_rewind(void *data) +{ + cmph_struct_vector_t *cmph_struct_vector = (cmph_struct_vector_t *)data; + cmph_struct_vector->position = 0; +} + static void key_vector_rewind(void *data) { cmph_vector_t *cmph_vector = (cmph_vector_t *)data; @@ -148,6 +191,31 @@ void cmph_io_nlnkfile_adapter_destroy(cmph_io_adapter_t * key_source) free(key_source); } + +static cmph_io_adapter_t *cmph_io_struct_vector_new(void * vector, cmph_uint32 struct_size, cmph_uint32 key_offset, cmph_uint32 key_len, cmph_uint32 nkeys) +{ + cmph_io_adapter_t * key_source = (cmph_io_adapter_t *)malloc(sizeof(cmph_io_adapter_t)); + cmph_struct_vector_t * cmph_struct_vector = (cmph_struct_vector_t *)malloc(sizeof(cmph_struct_vector_t)); + assert(key_source); + assert(cmph_struct_vector); + cmph_struct_vector->vector = vector; + cmph_struct_vector->position = 0; + cmph_struct_vector->struct_size = struct_size; + cmph_struct_vector->key_offset = key_offset; + cmph_struct_vector->key_len = key_len; + key_source->data = (void *)cmph_struct_vector; + key_source->nkeys = nkeys; + return key_source; +} + +static void cmph_io_struct_vector_destroy(cmph_io_adapter_t * key_source) +{ + cmph_struct_vector_t *cmph_struct_vector = (cmph_struct_vector_t *)key_source->data; + cmph_struct_vector->vector = NULL; + free(cmph_struct_vector); + free(key_source); +} + static cmph_io_adapter_t *cmph_io_vector_new(void * vector, cmph_uint32 nkeys) { cmph_io_adapter_t * key_source = (cmph_io_adapter_t *)malloc(sizeof(cmph_io_adapter_t)); @@ -181,6 +249,21 @@ void cmph_io_byte_vector_adapter_destroy(cmph_io_adapter_t * key_source) { cmph_io_vector_destroy(key_source); } + +cmph_io_adapter_t *cmph_io_struct_vector_adapter(void * vector, cmph_uint32 struct_size, cmph_uint32 key_offset, cmph_uint32 key_len, cmph_uint32 nkeys) +{ + cmph_io_adapter_t * key_source = cmph_io_struct_vector_new(vector, struct_size, key_offset, key_len, nkeys); + key_source->read = key_struct_vector_read; + key_source->dispose = key_vector_dispose; + key_source->rewind = key_struct_vector_rewind; + return key_source; +} + +void cmph_io_struct_vector_adapter_destroy(cmph_io_adapter_t * key_source) +{ + cmph_io_struct_vector_destroy(key_source); +} + cmph_io_adapter_t *cmph_io_vector_adapter(char ** vector, cmph_uint32 nkeys) { cmph_io_adapter_t * key_source = cmph_io_vector_new(vector, nkeys); diff --git a/src/cmph.h b/src/cmph.h index df9992a..be8af27 100644 --- a/src/cmph.h +++ b/src/cmph.h @@ -37,6 +37,9 @@ void cmph_io_vector_adapter_destroy(cmph_io_adapter_t * key_source); cmph_io_adapter_t *cmph_io_byte_vector_adapter(cmph_uint8 ** vector, cmph_uint32 nkeys); void cmph_io_byte_vector_adapter_destroy(cmph_io_adapter_t * key_source); +cmph_io_adapter_t *cmph_io_struct_vector_adapter(void * vector, cmph_uint32 struct_size, cmph_uint32 key_offset, cmph_uint32 key_len, cmph_uint32 nkeys); +void cmph_io_struct_vector_adapter_destroy(cmph_io_adapter_t * key_source); + /** Hash configuration API **/ cmph_config_t *cmph_config_new(cmph_io_adapter_t *key_source); void cmph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);