From 6bc95ace44fc5266ed7b42a4ee6bcd351f716d2f Mon Sep 17 00:00:00 2001 From: fc_botelho Date: Fri, 28 Jul 2006 22:36:50 +0000 Subject: [PATCH] strlen fuction removed from BRZ algorithm --- src/bmz8.c | 2 +- src/brz.c | 73 +++++++++++++++++++++----------------------- src/buffer_entry.c | 47 +++++++++++++++++++--------- src/buffer_entry.h | 2 +- src/buffer_manager.c | 6 ++-- src/buffer_manager.h | 2 +- src/cmph.c | 36 ++++++++++++++-------- src/cmph.h | 3 ++ 8 files changed, 98 insertions(+), 73 deletions(-) diff --git a/src/bmz8.c b/src/bmz8.c index db4b275..f0690dc 100644 --- a/src/bmz8.c +++ b/src/bmz8.c @@ -142,7 +142,7 @@ cmph_t *bmz8_new(cmph_config_t *mph, float c) used_edges = (cmph_uint8 *)malloc(bmz8->m/8 + 1); memset(used_edges, 0, bmz8->m/8 + 1); free(bmz8->g); - bmz8->g = (cmph_uint32 *)calloc(bmz8->n, sizeof(cmph_uint8)); + bmz8->g = (cmph_uint8 *)calloc(bmz8->n, sizeof(cmph_uint8)); assert(bmz8->g); for (i = 0; i < bmz8->n; ++i) // critical nodes { diff --git a/src/brz.c b/src/brz.c index a7622ff..dad31d1 100755 --- a/src/brz.c +++ b/src/brz.c @@ -19,7 +19,7 @@ static int brz_gen_mphf(cmph_config_t *mph); static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n); -static void brz_destroy_keys_vd(char ** keys_vd, cmph_uint8 nkeys); +static void brz_destroy_keys_vd(cmph_uint8 ** keys_vd, cmph_uint8 nkeys); static char * brz_copy_partial_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_uint32 *buflen); brz_config_data_t *brz_config_new() { @@ -209,7 +209,7 @@ static int brz_gen_mphf(cmph_config_t *mph) cmph_uint32 keylen; cmph_uint32 cur_bucket = 0; cmph_uint8 nkeys_vd = 0; - char ** keys_vd = NULL; + cmph_uint8 ** keys_vd = NULL; mph->key_source->rewind(mph->key_source->data); DEBUGP("Generating graphs from %u keys\n", brz->m); @@ -219,7 +219,7 @@ static int brz_gen_mphf(cmph_config_t *mph) mph->key_source->read(mph->key_source->data, &key, &keylen); /* Buffers management */ - if (memory_usage + keylen + 1 > brz->memory_availability) // flush buffers + if (memory_usage + keylen + sizeof(keylen) > brz->memory_availability) // flush buffers { if(mph->verbosity) { @@ -241,11 +241,11 @@ static int brz_gen_mphf(cmph_config_t *mph) keys_index = (cmph_uint32 *)calloc(nkeys_in_buffer, sizeof(cmph_uint32)); for(i = 0; i < nkeys_in_buffer; i++) { - keylen1 = strlen((char *)(buffer + memory_usage)); - h0 = hash(brz->h0, (char *)(buffer + memory_usage), keylen1) % brz->k; + memcpy(&keylen1, buffer + memory_usage, sizeof(keylen1)); + h0 = hash(brz->h0, (char *)(buffer + memory_usage + sizeof(keylen1)), keylen1) % brz->k; keys_index[buckets_size[h0]] = memory_usage; buckets_size[h0]++; - memory_usage = memory_usage + keylen1 + 1; + memory_usage += keylen1 + sizeof(keylen1); } filename = (char *)calloc(strlen((char *)(brz->tmp_dir)) + 11, sizeof(char)); sprintf(filename, "%s%u.cmph",brz->tmp_dir, nflushes); @@ -254,8 +254,8 @@ static int brz_gen_mphf(cmph_config_t *mph) filename = NULL; for(i = 0; i < nkeys_in_buffer; i++) { - keylen1 = strlen((char *)(buffer + keys_index[i])) + 1; - fwrite(buffer + keys_index[i], 1, keylen1, tmp_fd); + memcpy(&keylen1, buffer + keys_index[i], sizeof(keylen1)); + fwrite(buffer + keys_index[i], 1, keylen1 + sizeof(keylen1), tmp_fd); } nkeys_in_buffer = 0; memory_usage = 0; @@ -264,9 +264,11 @@ static int brz_gen_mphf(cmph_config_t *mph) free(keys_index); fclose(tmp_fd); } - memcpy(buffer + memory_usage, key, keylen + 1); - memory_usage = memory_usage + keylen + 1; + memcpy(buffer + memory_usage, &keylen, sizeof(keylen)); + memcpy(buffer + memory_usage + sizeof(keylen), key, keylen); + memory_usage += keylen + sizeof(keylen); h0 = hash(brz->h0, key, keylen) % brz->k; + if ((brz->size[h0] == MAX_BUCKET_SIZE) || ((brz->c >= 1.0) && (cmph_uint8)(brz->c * brz->size[h0]) < brz->size[h0])) { free(buffer); @@ -278,7 +280,6 @@ static int brz_gen_mphf(cmph_config_t *mph) nkeys_in_buffer++; mph->key_source->dispose(mph->key_source->data, key, keylen); } - if (memory_usage != 0) // flush buffers { if(mph->verbosity) @@ -300,11 +301,11 @@ static int brz_gen_mphf(cmph_config_t *mph) keys_index = (cmph_uint32 *)calloc(nkeys_in_buffer, sizeof(cmph_uint32)); for(i = 0; i < nkeys_in_buffer; i++) { - keylen1 = strlen((char *)(buffer + memory_usage)); - h0 = hash(brz->h0, (char *)(buffer + memory_usage), keylen1) % brz->k; + memcpy(&keylen1, buffer + memory_usage, sizeof(keylen1)); + h0 = hash(brz->h0, (char *)(buffer + memory_usage + sizeof(keylen1)), keylen1) % brz->k; keys_index[buckets_size[h0]] = memory_usage; buckets_size[h0]++; - memory_usage = memory_usage + keylen1 + 1; + memory_usage += keylen1 + sizeof(keylen1); } filename = (char *)calloc(strlen((char *)(brz->tmp_dir)) + 11, sizeof(char)); sprintf(filename, "%s%u.cmph",brz->tmp_dir, nflushes); @@ -313,8 +314,8 @@ static int brz_gen_mphf(cmph_config_t *mph) filename = NULL; for(i = 0; i < nkeys_in_buffer; i++) { - keylen1 = strlen((char *)(buffer + keys_index[i])) + 1; - fwrite(buffer + keys_index[i], 1, keylen1, tmp_fd); + memcpy(&keylen1, buffer + keys_index[i], sizeof(keylen1)); + fwrite(buffer + keys_index[i], 1, keylen1 + sizeof(keylen1), tmp_fd); } nkeys_in_buffer = 0; memory_usage = 0; @@ -352,50 +353,46 @@ static int brz_gen_mphf(cmph_config_t *mph) buffer_manager_open(buff_manager, i, filename); free(filename); filename = NULL; - key = (char *)buffer_manager_read_key(buff_manager, i); - keylen = strlen(key); - h0 = hash(brz->h0, key, keylen) % brz->k; + key = (char *)buffer_manager_read_key(buff_manager, i, &keylen); + h0 = hash(brz->h0, key+sizeof(keylen), keylen) % brz->k; buffer_h0[i] = h0; - buffer_merge[i] = (cmph_uint8 *)calloc(keylen + 1, sizeof(cmph_uint8)); - memcpy(buffer_merge[i], key, keylen + 1); - free(key); + buffer_merge[i] = (cmph_uint8 *)key; + key = NULL; //transfer memory ownership } e = 0; - keys_vd = (char **)calloc(MAX_BUCKET_SIZE, sizeof(char *)); + keys_vd = (cmph_uint8 **)calloc(MAX_BUCKET_SIZE, sizeof(cmph_uint8 *)); nkeys_vd = 0; while(e < brz->m) { i = brz_min_index(buffer_h0, nflushes); cur_bucket = buffer_h0[i]; - key = (char *)buffer_manager_read_key(buff_manager, i); + key = (char *)buffer_manager_read_key(buff_manager, i, &keylen); if(key) { while(key) { - keylen = strlen(key); - h0 = hash(brz->h0, key, keylen) % brz->k; - if (h0 != buffer_h0[i]) break; - keys_vd[nkeys_vd++] = key; + //keylen = strlen(key); + h0 = hash(brz->h0, key+sizeof(keylen), keylen) % brz->k; + if (h0 != buffer_h0[i]) break; + keys_vd[nkeys_vd++] = (cmph_uint8 *)key; key = NULL; //transfer memory ownership e++; - key = (char *)buffer_manager_read_key(buff_manager, i); + key = (char *)buffer_manager_read_key(buff_manager, i, &keylen); } if (key) { assert(nkeys_vd < brz->size[cur_bucket]); - keys_vd[nkeys_vd++] = (char *)buffer_merge[i]; + keys_vd[nkeys_vd++] = buffer_merge[i]; buffer_merge[i] = NULL; //transfer memory ownership e++; buffer_h0[i] = h0; - buffer_merge[i] = (cmph_uint8 *)calloc(keylen + 1, sizeof(cmph_uint8)); - memcpy(buffer_merge[i], key, keylen + 1); - free(key); + buffer_merge[i] = (cmph_uint8 *)key; } } if(!key) { assert(nkeys_vd < brz->size[cur_bucket]); - keys_vd[nkeys_vd++] = (char *)buffer_merge[i]; + keys_vd[nkeys_vd++] = buffer_merge[i]; buffer_merge[i] = NULL; //transfer memory ownership e++; buffer_h0[i] = UINT_MAX; @@ -410,7 +407,7 @@ static int brz_gen_mphf(cmph_config_t *mph) char *bufmphf = NULL; cmph_uint32 buflenmphf = 0; // Source of keys - source = cmph_io_vector_adapter(keys_vd, (cmph_uint32)nkeys_vd); + source = cmph_io_byte_vector_adapter(keys_vd, (cmph_uint32)nkeys_vd); config = cmph_config_new(source); cmph_config_set_algo(config, CMPH_BMZ8); cmph_config_set_graphsize(config, brz->c); @@ -424,8 +421,7 @@ static int brz_gen_mphf(cmph_config_t *mph) cmph_config_destroy(config); brz_destroy_keys_vd(keys_vd, nkeys_vd); cmph_destroy(mphf_tmp); - cmph_io_vector_adapter_destroy(source); - + cmph_io_byte_vector_adapter_destroy(source); nkeys_vd = 0; } } @@ -447,7 +443,7 @@ static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n) return min_index; } -static void brz_destroy_keys_vd(char ** keys_vd, cmph_uint8 nkeys) +static void brz_destroy_keys_vd(cmph_uint8 ** keys_vd, cmph_uint8 nkeys) { cmph_uint8 i; for(i = 0; i < nkeys; i++) { free(keys_vd[i]); keys_vd[i] = NULL;} @@ -465,7 +461,6 @@ static char * brz_copy_partial_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, hash_state_dump(bmzf->hashes[1], &bufh2, &buflenh2); *buflen = buflenh1 + buflenh2 + n + 2*sizeof(cmph_uint32); buf = (char *)malloc(*buflen); - //fprintf(stderr,"entrei passei\n"); memcpy(buf, &buflenh1, sizeof(cmph_uint32)); memcpy(buf+sizeof(cmph_uint32), bufh1, buflenh1); memcpy(buf+sizeof(cmph_uint32)+buflenh1, &buflenh2, sizeof(cmph_uint32)); diff --git a/src/buffer_entry.c b/src/buffer_entry.c index bd9f82d..25152ea 100644 --- a/src/buffer_entry.c +++ b/src/buffer_entry.c @@ -2,6 +2,7 @@ #include #include #include +#include struct __buffer_entry_t { @@ -51,24 +52,40 @@ void buffer_entry_load(buffer_entry_t * buffer_entry) buffer_entry->pos = 0; } -cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry) +cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry, cmph_uint32 * keylen) { - cmph_uint8 * buf = (cmph_uint8 *)malloc(BUFSIZ); - cmph_uint32 buf_pos = 0; - cmph_uint8 c; - while(1) + cmph_uint8 * buf = NULL; + cmph_uint32 lacked_bytes = sizeof(*keylen); + cmph_uint32 copied_bytes = 0; + if(buffer_entry->eof && (buffer_entry->pos == buffer_entry->nbytes)) // end { - if(buffer_entry->eof && (buffer_entry->pos == buffer_entry->nbytes)) // end - { - free(buf); - return NULL; - } - if(buffer_entry->pos == buffer_entry->nbytes) buffer_entry_load(buffer_entry); - c = buffer_entry->buff[(buffer_entry->pos)++]; - buf[buf_pos++] = c; - if(c == '\0') break; - if(buf_pos % BUFSIZ == 0) buf = (cmph_uint8 *)realloc(buf, buf_pos + BUFSIZ); + free(buf); + return NULL; } + if((buffer_entry->pos + lacked_bytes) > buffer_entry->nbytes) + { + copied_bytes = buffer_entry->nbytes - buffer_entry->pos; + lacked_bytes = (buffer_entry->pos + lacked_bytes) - buffer_entry->nbytes; + if (copied_bytes != 0) memcpy(keylen, buffer_entry->buff + buffer_entry->pos, copied_bytes); + buffer_entry_load(buffer_entry); + } + memcpy(keylen + copied_bytes, buffer_entry->buff + buffer_entry->pos, lacked_bytes); + buffer_entry->pos += lacked_bytes; + + lacked_bytes = *keylen; + copied_bytes = 0; + buf = (cmph_uint8 *)malloc(*keylen + sizeof(*keylen)); + memcpy(buf, keylen, sizeof(*keylen)); + if((buffer_entry->pos + lacked_bytes) > buffer_entry->nbytes) { + copied_bytes = buffer_entry->nbytes - buffer_entry->pos; + lacked_bytes = (buffer_entry->pos + lacked_bytes) - buffer_entry->nbytes; + if (copied_bytes != 0) { + memcpy(buf + sizeof(*keylen), buffer_entry->buff + buffer_entry->pos, copied_bytes); + } + buffer_entry_load(buffer_entry); + } + memcpy(buf+sizeof(*keylen)+copied_bytes, buffer_entry->buff + buffer_entry->pos, lacked_bytes); + buffer_entry->pos += lacked_bytes; return buf; } diff --git a/src/buffer_entry.h b/src/buffer_entry.h index 6f94924..62102ba 100644 --- a/src/buffer_entry.h +++ b/src/buffer_entry.h @@ -9,6 +9,6 @@ buffer_entry_t * buffer_entry_new(cmph_uint32 capacity); void buffer_entry_set_capacity(buffer_entry_t * buffer_entry, cmph_uint32 capacity); cmph_uint32 buffer_entry_get_capacity(buffer_entry_t * buffer_entry); void buffer_entry_open(buffer_entry_t * buffer_entry, char * filename); -cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry); +cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry, cmph_uint32 * keylen); void buffer_entry_destroy(buffer_entry_t * buffer_entry); #endif diff --git a/src/buffer_manager.c b/src/buffer_manager.c index a23e44d..3c20490 100644 --- a/src/buffer_manager.c +++ b/src/buffer_manager.c @@ -35,20 +35,18 @@ void buffer_manager_open(buffer_manager_t * buffer_manager, cmph_uint32 index, c buffer_entry_open(buffer_manager->buffer_entries[index], filename); } -cmph_uint8 * buffer_manager_read_key(buffer_manager_t * buffer_manager, cmph_uint32 index) +cmph_uint8 * buffer_manager_read_key(buffer_manager_t * buffer_manager, cmph_uint32 index, cmph_uint32 * keylen) { cmph_uint8 * key = NULL; if (buffer_manager->pos_avail_list >= 0 ) // recovering memory { cmph_uint32 new_capacity = buffer_entry_get_capacity(buffer_manager->buffer_entries[index]) + buffer_manager->memory_avail_list[(buffer_manager->pos_avail_list)--]; buffer_entry_set_capacity(buffer_manager->buffer_entries[index], new_capacity); - //fprintf(stderr, "recovering memory\n"); } - key = buffer_entry_read_key(buffer_manager->buffer_entries[index]); + key = buffer_entry_read_key(buffer_manager->buffer_entries[index], keylen); if (key == NULL) // storing memory to be recovered { buffer_manager->memory_avail_list[++(buffer_manager->pos_avail_list)] = buffer_entry_get_capacity(buffer_manager->buffer_entries[index]); - //fprintf(stderr, "storing memory to be recovered\n"); } return key; } diff --git a/src/buffer_manager.h b/src/buffer_manager.h index daa2efc..af99c20 100644 --- a/src/buffer_manager.h +++ b/src/buffer_manager.h @@ -7,6 +7,6 @@ typedef struct __buffer_manager_t buffer_manager_t; buffer_manager_t * buffer_manager_new(cmph_uint32 memory_avail, cmph_uint32 nentries); void buffer_manager_open(buffer_manager_t * buffer_manager, cmph_uint32 index, char * filename); -cmph_uint8 * buffer_manager_read_key(buffer_manager_t * buffer_manager, cmph_uint32 index); +cmph_uint8 * buffer_manager_read_key(buffer_manager_t * buffer_manager, cmph_uint32 index, cmph_uint32 * keylen); void buffer_manager_destroy(buffer_manager_t * buffer_manager); #endif diff --git a/src/cmph.c b/src/cmph.c index 38fda0e..eb4bf5e 100644 --- a/src/cmph.c +++ b/src/cmph.c @@ -47,22 +47,22 @@ static int key_nlfile_read(void *data, char **key, cmph_uint32 *keylen) return *keylen; } +static int key_byte_vector_read(void *data, char **key, cmph_uint32 *keylen) +{ + cmph_vector_t *cmph_vector = (cmph_vector_t *)data; + cmph_uint8 **keys_vd = (cmph_uint8 **)cmph_vector->vector; + memcpy(keylen, keys_vd[cmph_vector->position], sizeof(*keylen)); + *key = (char *)malloc(*keylen); + memcpy(*key, keys_vd[cmph_vector->position] + sizeof(*keylen), *keylen); + cmph_vector->position = cmph_vector->position + 1; + return *keylen; + +} + static int key_vector_read(void *data, char **key, cmph_uint32 *keylen) { -/* - cmph_vector_t *cmph_vector = (cmph_vector_t *)data; - char **keys_vd = (char **)cmph_vector->vector; - - if (keys_vd + cmph_vector->position == NULL) return -1; - *keylen = strlen(*(keys_vd + cmph_vector->position)); - *key = (char *)malloc(*keylen + 1); - strcpy(*key, *(keys_vd + cmph_vector->position)); - cmph_vector->position = cmph_vector->position + 1; -*/ cmph_vector_t *cmph_vector = (cmph_vector_t *)data; char **keys_vd = (char **)cmph_vector->vector; - -// if (keys_vd + cmph_vector->position == NULL) return -1; *keylen = strlen(keys_vd[cmph_vector->position]); *key = (char *)malloc(*keylen + 1); strcpy(*key, keys_vd[cmph_vector->position]); @@ -165,6 +165,18 @@ static void cmph_io_vector_destroy(cmph_io_adapter_t * key_source) free(key_source); } +cmph_io_adapter_t *cmph_io_byte_vector_adapter(cmph_uint8 ** vector, cmph_uint32 nkeys) +{ + cmph_io_adapter_t * key_source = cmph_io_vector_new(vector, nkeys); + key_source->read = key_byte_vector_read; + key_source->dispose = key_vector_dispose; + key_source->rewind = key_vector_rewind; + return key_source; +} +void cmph_io_byte_vector_adapter_destroy(cmph_io_adapter_t * key_source) +{ + cmph_io_vector_destroy(key_source); +} cmph_io_adapter_t *cmph_io_vector_adapter(char ** vector, cmph_uint32 nkeys) { cmph_io_adapter_t * key_source = cmph_io_vector_new(vector, nkeys); diff --git a/src/cmph.h b/src/cmph.h index 0d42bcb..235a1e2 100644 --- a/src/cmph.h +++ b/src/cmph.h @@ -34,6 +34,9 @@ void cmph_io_nlnkfile_adapter_destroy(cmph_io_adapter_t * key_source); cmph_io_adapter_t *cmph_io_vector_adapter(char ** vector, cmph_uint32 nkeys); void cmph_io_vector_adapter_destroy(cmph_io_adapter_t * key_source); +cmph_io_adapter_t *cmph_io_byte_vector_adapter(cmph_uint8 ** vector, cmph_uint32 nkeys); +void cmph_io_byte_vector_adapter_destroy(cmph_io_adapter_t * key_source); + /** Hash configuration API **/ cmph_config_t *cmph_config_new(cmph_io_adapter_t *key_source); void cmph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);