strlen fuction removed from BRZ algorithm

This commit is contained in:
fc_botelho 2006-07-28 22:36:50 +00:00
parent 32829274c4
commit 6bc95ace44
8 changed files with 98 additions and 73 deletions

View File

@ -142,7 +142,7 @@ cmph_t *bmz8_new(cmph_config_t *mph, float c)
used_edges = (cmph_uint8 *)malloc(bmz8->m/8 + 1);
memset(used_edges, 0, bmz8->m/8 + 1);
free(bmz8->g);
bmz8->g = (cmph_uint32 *)calloc(bmz8->n, sizeof(cmph_uint8));
bmz8->g = (cmph_uint8 *)calloc(bmz8->n, sizeof(cmph_uint8));
assert(bmz8->g);
for (i = 0; i < bmz8->n; ++i) // critical nodes
{

View File

@ -19,7 +19,7 @@
static int brz_gen_mphf(cmph_config_t *mph);
static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n);
static void brz_destroy_keys_vd(char ** keys_vd, cmph_uint8 nkeys);
static void brz_destroy_keys_vd(cmph_uint8 ** keys_vd, cmph_uint8 nkeys);
static char * brz_copy_partial_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_uint32 *buflen);
brz_config_data_t *brz_config_new()
{
@ -209,7 +209,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
cmph_uint32 keylen;
cmph_uint32 cur_bucket = 0;
cmph_uint8 nkeys_vd = 0;
char ** keys_vd = NULL;
cmph_uint8 ** keys_vd = NULL;
mph->key_source->rewind(mph->key_source->data);
DEBUGP("Generating graphs from %u keys\n", brz->m);
@ -219,7 +219,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
mph->key_source->read(mph->key_source->data, &key, &keylen);
/* Buffers management */
if (memory_usage + keylen + 1 > brz->memory_availability) // flush buffers
if (memory_usage + keylen + sizeof(keylen) > brz->memory_availability) // flush buffers
{
if(mph->verbosity)
{
@ -241,11 +241,11 @@ static int brz_gen_mphf(cmph_config_t *mph)
keys_index = (cmph_uint32 *)calloc(nkeys_in_buffer, sizeof(cmph_uint32));
for(i = 0; i < nkeys_in_buffer; i++)
{
keylen1 = strlen((char *)(buffer + memory_usage));
h0 = hash(brz->h0, (char *)(buffer + memory_usage), keylen1) % brz->k;
memcpy(&keylen1, buffer + memory_usage, sizeof(keylen1));
h0 = hash(brz->h0, (char *)(buffer + memory_usage + sizeof(keylen1)), keylen1) % brz->k;
keys_index[buckets_size[h0]] = memory_usage;
buckets_size[h0]++;
memory_usage = memory_usage + keylen1 + 1;
memory_usage += keylen1 + sizeof(keylen1);
}
filename = (char *)calloc(strlen((char *)(brz->tmp_dir)) + 11, sizeof(char));
sprintf(filename, "%s%u.cmph",brz->tmp_dir, nflushes);
@ -254,8 +254,8 @@ static int brz_gen_mphf(cmph_config_t *mph)
filename = NULL;
for(i = 0; i < nkeys_in_buffer; i++)
{
keylen1 = strlen((char *)(buffer + keys_index[i])) + 1;
fwrite(buffer + keys_index[i], 1, keylen1, tmp_fd);
memcpy(&keylen1, buffer + keys_index[i], sizeof(keylen1));
fwrite(buffer + keys_index[i], 1, keylen1 + sizeof(keylen1), tmp_fd);
}
nkeys_in_buffer = 0;
memory_usage = 0;
@ -264,9 +264,11 @@ static int brz_gen_mphf(cmph_config_t *mph)
free(keys_index);
fclose(tmp_fd);
}
memcpy(buffer + memory_usage, key, keylen + 1);
memory_usage = memory_usage + keylen + 1;
memcpy(buffer + memory_usage, &keylen, sizeof(keylen));
memcpy(buffer + memory_usage + sizeof(keylen), key, keylen);
memory_usage += keylen + sizeof(keylen);
h0 = hash(brz->h0, key, keylen) % brz->k;
if ((brz->size[h0] == MAX_BUCKET_SIZE) || ((brz->c >= 1.0) && (cmph_uint8)(brz->c * brz->size[h0]) < brz->size[h0]))
{
free(buffer);
@ -278,7 +280,6 @@ static int brz_gen_mphf(cmph_config_t *mph)
nkeys_in_buffer++;
mph->key_source->dispose(mph->key_source->data, key, keylen);
}
if (memory_usage != 0) // flush buffers
{
if(mph->verbosity)
@ -300,11 +301,11 @@ static int brz_gen_mphf(cmph_config_t *mph)
keys_index = (cmph_uint32 *)calloc(nkeys_in_buffer, sizeof(cmph_uint32));
for(i = 0; i < nkeys_in_buffer; i++)
{
keylen1 = strlen((char *)(buffer + memory_usage));
h0 = hash(brz->h0, (char *)(buffer + memory_usage), keylen1) % brz->k;
memcpy(&keylen1, buffer + memory_usage, sizeof(keylen1));
h0 = hash(brz->h0, (char *)(buffer + memory_usage + sizeof(keylen1)), keylen1) % brz->k;
keys_index[buckets_size[h0]] = memory_usage;
buckets_size[h0]++;
memory_usage = memory_usage + keylen1 + 1;
memory_usage += keylen1 + sizeof(keylen1);
}
filename = (char *)calloc(strlen((char *)(brz->tmp_dir)) + 11, sizeof(char));
sprintf(filename, "%s%u.cmph",brz->tmp_dir, nflushes);
@ -313,8 +314,8 @@ static int brz_gen_mphf(cmph_config_t *mph)
filename = NULL;
for(i = 0; i < nkeys_in_buffer; i++)
{
keylen1 = strlen((char *)(buffer + keys_index[i])) + 1;
fwrite(buffer + keys_index[i], 1, keylen1, tmp_fd);
memcpy(&keylen1, buffer + keys_index[i], sizeof(keylen1));
fwrite(buffer + keys_index[i], 1, keylen1 + sizeof(keylen1), tmp_fd);
}
nkeys_in_buffer = 0;
memory_usage = 0;
@ -352,50 +353,46 @@ static int brz_gen_mphf(cmph_config_t *mph)
buffer_manager_open(buff_manager, i, filename);
free(filename);
filename = NULL;
key = (char *)buffer_manager_read_key(buff_manager, i);
keylen = strlen(key);
h0 = hash(brz->h0, key, keylen) % brz->k;
key = (char *)buffer_manager_read_key(buff_manager, i, &keylen);
h0 = hash(brz->h0, key+sizeof(keylen), keylen) % brz->k;
buffer_h0[i] = h0;
buffer_merge[i] = (cmph_uint8 *)calloc(keylen + 1, sizeof(cmph_uint8));
memcpy(buffer_merge[i], key, keylen + 1);
free(key);
buffer_merge[i] = (cmph_uint8 *)key;
key = NULL; //transfer memory ownership
}
e = 0;
keys_vd = (char **)calloc(MAX_BUCKET_SIZE, sizeof(char *));
keys_vd = (cmph_uint8 **)calloc(MAX_BUCKET_SIZE, sizeof(cmph_uint8 *));
nkeys_vd = 0;
while(e < brz->m)
{
i = brz_min_index(buffer_h0, nflushes);
cur_bucket = buffer_h0[i];
key = (char *)buffer_manager_read_key(buff_manager, i);
key = (char *)buffer_manager_read_key(buff_manager, i, &keylen);
if(key)
{
while(key)
{
keylen = strlen(key);
h0 = hash(brz->h0, key, keylen) % brz->k;
//keylen = strlen(key);
h0 = hash(brz->h0, key+sizeof(keylen), keylen) % brz->k;
if (h0 != buffer_h0[i]) break;
keys_vd[nkeys_vd++] = key;
keys_vd[nkeys_vd++] = (cmph_uint8 *)key;
key = NULL; //transfer memory ownership
e++;
key = (char *)buffer_manager_read_key(buff_manager, i);
key = (char *)buffer_manager_read_key(buff_manager, i, &keylen);
}
if (key)
{
assert(nkeys_vd < brz->size[cur_bucket]);
keys_vd[nkeys_vd++] = (char *)buffer_merge[i];
keys_vd[nkeys_vd++] = buffer_merge[i];
buffer_merge[i] = NULL; //transfer memory ownership
e++;
buffer_h0[i] = h0;
buffer_merge[i] = (cmph_uint8 *)calloc(keylen + 1, sizeof(cmph_uint8));
memcpy(buffer_merge[i], key, keylen + 1);
free(key);
buffer_merge[i] = (cmph_uint8 *)key;
}
}
if(!key)
{
assert(nkeys_vd < brz->size[cur_bucket]);
keys_vd[nkeys_vd++] = (char *)buffer_merge[i];
keys_vd[nkeys_vd++] = buffer_merge[i];
buffer_merge[i] = NULL; //transfer memory ownership
e++;
buffer_h0[i] = UINT_MAX;
@ -410,7 +407,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
char *bufmphf = NULL;
cmph_uint32 buflenmphf = 0;
// Source of keys
source = cmph_io_vector_adapter(keys_vd, (cmph_uint32)nkeys_vd);
source = cmph_io_byte_vector_adapter(keys_vd, (cmph_uint32)nkeys_vd);
config = cmph_config_new(source);
cmph_config_set_algo(config, CMPH_BMZ8);
cmph_config_set_graphsize(config, brz->c);
@ -424,8 +421,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
cmph_config_destroy(config);
brz_destroy_keys_vd(keys_vd, nkeys_vd);
cmph_destroy(mphf_tmp);
cmph_io_vector_adapter_destroy(source);
cmph_io_byte_vector_adapter_destroy(source);
nkeys_vd = 0;
}
}
@ -447,7 +443,7 @@ static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n)
return min_index;
}
static void brz_destroy_keys_vd(char ** keys_vd, cmph_uint8 nkeys)
static void brz_destroy_keys_vd(cmph_uint8 ** keys_vd, cmph_uint8 nkeys)
{
cmph_uint8 i;
for(i = 0; i < nkeys; i++) { free(keys_vd[i]); keys_vd[i] = NULL;}
@ -465,7 +461,6 @@ static char * brz_copy_partial_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf,
hash_state_dump(bmzf->hashes[1], &bufh2, &buflenh2);
*buflen = buflenh1 + buflenh2 + n + 2*sizeof(cmph_uint32);
buf = (char *)malloc(*buflen);
//fprintf(stderr,"entrei passei\n");
memcpy(buf, &buflenh1, sizeof(cmph_uint32));
memcpy(buf+sizeof(cmph_uint32), bufh1, buflenh1);
memcpy(buf+sizeof(cmph_uint32)+buflenh1, &buflenh2, sizeof(cmph_uint32));

View File

@ -2,6 +2,7 @@
#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
struct __buffer_entry_t
{
@ -51,24 +52,40 @@ void buffer_entry_load(buffer_entry_t * buffer_entry)
buffer_entry->pos = 0;
}
cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry)
cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry, cmph_uint32 * keylen)
{
cmph_uint8 * buf = (cmph_uint8 *)malloc(BUFSIZ);
cmph_uint32 buf_pos = 0;
cmph_uint8 c;
while(1)
{
cmph_uint8 * buf = NULL;
cmph_uint32 lacked_bytes = sizeof(*keylen);
cmph_uint32 copied_bytes = 0;
if(buffer_entry->eof && (buffer_entry->pos == buffer_entry->nbytes)) // end
{
free(buf);
return NULL;
}
if(buffer_entry->pos == buffer_entry->nbytes) buffer_entry_load(buffer_entry);
c = buffer_entry->buff[(buffer_entry->pos)++];
buf[buf_pos++] = c;
if(c == '\0') break;
if(buf_pos % BUFSIZ == 0) buf = (cmph_uint8 *)realloc(buf, buf_pos + BUFSIZ);
if((buffer_entry->pos + lacked_bytes) > buffer_entry->nbytes)
{
copied_bytes = buffer_entry->nbytes - buffer_entry->pos;
lacked_bytes = (buffer_entry->pos + lacked_bytes) - buffer_entry->nbytes;
if (copied_bytes != 0) memcpy(keylen, buffer_entry->buff + buffer_entry->pos, copied_bytes);
buffer_entry_load(buffer_entry);
}
memcpy(keylen + copied_bytes, buffer_entry->buff + buffer_entry->pos, lacked_bytes);
buffer_entry->pos += lacked_bytes;
lacked_bytes = *keylen;
copied_bytes = 0;
buf = (cmph_uint8 *)malloc(*keylen + sizeof(*keylen));
memcpy(buf, keylen, sizeof(*keylen));
if((buffer_entry->pos + lacked_bytes) > buffer_entry->nbytes) {
copied_bytes = buffer_entry->nbytes - buffer_entry->pos;
lacked_bytes = (buffer_entry->pos + lacked_bytes) - buffer_entry->nbytes;
if (copied_bytes != 0) {
memcpy(buf + sizeof(*keylen), buffer_entry->buff + buffer_entry->pos, copied_bytes);
}
buffer_entry_load(buffer_entry);
}
memcpy(buf+sizeof(*keylen)+copied_bytes, buffer_entry->buff + buffer_entry->pos, lacked_bytes);
buffer_entry->pos += lacked_bytes;
return buf;
}

View File

@ -9,6 +9,6 @@ buffer_entry_t * buffer_entry_new(cmph_uint32 capacity);
void buffer_entry_set_capacity(buffer_entry_t * buffer_entry, cmph_uint32 capacity);
cmph_uint32 buffer_entry_get_capacity(buffer_entry_t * buffer_entry);
void buffer_entry_open(buffer_entry_t * buffer_entry, char * filename);
cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry);
cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry, cmph_uint32 * keylen);
void buffer_entry_destroy(buffer_entry_t * buffer_entry);
#endif

View File

@ -35,20 +35,18 @@ void buffer_manager_open(buffer_manager_t * buffer_manager, cmph_uint32 index, c
buffer_entry_open(buffer_manager->buffer_entries[index], filename);
}
cmph_uint8 * buffer_manager_read_key(buffer_manager_t * buffer_manager, cmph_uint32 index)
cmph_uint8 * buffer_manager_read_key(buffer_manager_t * buffer_manager, cmph_uint32 index, cmph_uint32 * keylen)
{
cmph_uint8 * key = NULL;
if (buffer_manager->pos_avail_list >= 0 ) // recovering memory
{
cmph_uint32 new_capacity = buffer_entry_get_capacity(buffer_manager->buffer_entries[index]) + buffer_manager->memory_avail_list[(buffer_manager->pos_avail_list)--];
buffer_entry_set_capacity(buffer_manager->buffer_entries[index], new_capacity);
//fprintf(stderr, "recovering memory\n");
}
key = buffer_entry_read_key(buffer_manager->buffer_entries[index]);
key = buffer_entry_read_key(buffer_manager->buffer_entries[index], keylen);
if (key == NULL) // storing memory to be recovered
{
buffer_manager->memory_avail_list[++(buffer_manager->pos_avail_list)] = buffer_entry_get_capacity(buffer_manager->buffer_entries[index]);
//fprintf(stderr, "storing memory to be recovered\n");
}
return key;
}

View File

@ -7,6 +7,6 @@ typedef struct __buffer_manager_t buffer_manager_t;
buffer_manager_t * buffer_manager_new(cmph_uint32 memory_avail, cmph_uint32 nentries);
void buffer_manager_open(buffer_manager_t * buffer_manager, cmph_uint32 index, char * filename);
cmph_uint8 * buffer_manager_read_key(buffer_manager_t * buffer_manager, cmph_uint32 index);
cmph_uint8 * buffer_manager_read_key(buffer_manager_t * buffer_manager, cmph_uint32 index, cmph_uint32 * keylen);
void buffer_manager_destroy(buffer_manager_t * buffer_manager);
#endif

View File

@ -47,22 +47,22 @@ static int key_nlfile_read(void *data, char **key, cmph_uint32 *keylen)
return *keylen;
}
static int key_byte_vector_read(void *data, char **key, cmph_uint32 *keylen)
{
cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
cmph_uint8 **keys_vd = (cmph_uint8 **)cmph_vector->vector;
memcpy(keylen, keys_vd[cmph_vector->position], sizeof(*keylen));
*key = (char *)malloc(*keylen);
memcpy(*key, keys_vd[cmph_vector->position] + sizeof(*keylen), *keylen);
cmph_vector->position = cmph_vector->position + 1;
return *keylen;
}
static int key_vector_read(void *data, char **key, cmph_uint32 *keylen)
{
/*
cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
char **keys_vd = (char **)cmph_vector->vector;
if (keys_vd + cmph_vector->position == NULL) return -1;
*keylen = strlen(*(keys_vd + cmph_vector->position));
*key = (char *)malloc(*keylen + 1);
strcpy(*key, *(keys_vd + cmph_vector->position));
cmph_vector->position = cmph_vector->position + 1;
*/
cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
char **keys_vd = (char **)cmph_vector->vector;
// if (keys_vd + cmph_vector->position == NULL) return -1;
*keylen = strlen(keys_vd[cmph_vector->position]);
*key = (char *)malloc(*keylen + 1);
strcpy(*key, keys_vd[cmph_vector->position]);
@ -165,6 +165,18 @@ static void cmph_io_vector_destroy(cmph_io_adapter_t * key_source)
free(key_source);
}
cmph_io_adapter_t *cmph_io_byte_vector_adapter(cmph_uint8 ** vector, cmph_uint32 nkeys)
{
cmph_io_adapter_t * key_source = cmph_io_vector_new(vector, nkeys);
key_source->read = key_byte_vector_read;
key_source->dispose = key_vector_dispose;
key_source->rewind = key_vector_rewind;
return key_source;
}
void cmph_io_byte_vector_adapter_destroy(cmph_io_adapter_t * key_source)
{
cmph_io_vector_destroy(key_source);
}
cmph_io_adapter_t *cmph_io_vector_adapter(char ** vector, cmph_uint32 nkeys)
{
cmph_io_adapter_t * key_source = cmph_io_vector_new(vector, nkeys);

View File

@ -34,6 +34,9 @@ void cmph_io_nlnkfile_adapter_destroy(cmph_io_adapter_t * key_source);
cmph_io_adapter_t *cmph_io_vector_adapter(char ** vector, cmph_uint32 nkeys);
void cmph_io_vector_adapter_destroy(cmph_io_adapter_t * key_source);
cmph_io_adapter_t *cmph_io_byte_vector_adapter(cmph_uint8 ** vector, cmph_uint32 nkeys);
void cmph_io_byte_vector_adapter_destroy(cmph_io_adapter_t * key_source);
/** Hash configuration API **/
cmph_config_t *cmph_config_new(cmph_io_adapter_t *key_source);
void cmph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);