strlen fuction removed from BRZ algorithm
This commit is contained in:
parent
9d762c4bc0
commit
e75eb4b616
@ -142,7 +142,7 @@ cmph_t *bmz8_new(cmph_config_t *mph, float c)
|
||||
used_edges = (cmph_uint8 *)malloc(bmz8->m/8 + 1);
|
||||
memset(used_edges, 0, bmz8->m/8 + 1);
|
||||
free(bmz8->g);
|
||||
bmz8->g = (cmph_uint32 *)calloc(bmz8->n, sizeof(cmph_uint8));
|
||||
bmz8->g = (cmph_uint8 *)calloc(bmz8->n, sizeof(cmph_uint8));
|
||||
assert(bmz8->g);
|
||||
for (i = 0; i < bmz8->n; ++i) // critical nodes
|
||||
{
|
||||
|
71
src/brz.c
71
src/brz.c
@ -19,7 +19,7 @@
|
||||
|
||||
static int brz_gen_mphf(cmph_config_t *mph);
|
||||
static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n);
|
||||
static void brz_destroy_keys_vd(char ** keys_vd, cmph_uint8 nkeys);
|
||||
static void brz_destroy_keys_vd(cmph_uint8 ** keys_vd, cmph_uint8 nkeys);
|
||||
static char * brz_copy_partial_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_uint32 *buflen);
|
||||
brz_config_data_t *brz_config_new()
|
||||
{
|
||||
@ -209,7 +209,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
|
||||
cmph_uint32 keylen;
|
||||
cmph_uint32 cur_bucket = 0;
|
||||
cmph_uint8 nkeys_vd = 0;
|
||||
char ** keys_vd = NULL;
|
||||
cmph_uint8 ** keys_vd = NULL;
|
||||
|
||||
mph->key_source->rewind(mph->key_source->data);
|
||||
DEBUGP("Generating graphs from %u keys\n", brz->m);
|
||||
@ -219,7 +219,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
|
||||
mph->key_source->read(mph->key_source->data, &key, &keylen);
|
||||
|
||||
/* Buffers management */
|
||||
if (memory_usage + keylen + 1 > brz->memory_availability) // flush buffers
|
||||
if (memory_usage + keylen + sizeof(keylen) > brz->memory_availability) // flush buffers
|
||||
{
|
||||
if(mph->verbosity)
|
||||
{
|
||||
@ -241,11 +241,11 @@ static int brz_gen_mphf(cmph_config_t *mph)
|
||||
keys_index = (cmph_uint32 *)calloc(nkeys_in_buffer, sizeof(cmph_uint32));
|
||||
for(i = 0; i < nkeys_in_buffer; i++)
|
||||
{
|
||||
keylen1 = strlen((char *)(buffer + memory_usage));
|
||||
h0 = hash(brz->h0, (char *)(buffer + memory_usage), keylen1) % brz->k;
|
||||
memcpy(&keylen1, buffer + memory_usage, sizeof(keylen1));
|
||||
h0 = hash(brz->h0, (char *)(buffer + memory_usage + sizeof(keylen1)), keylen1) % brz->k;
|
||||
keys_index[buckets_size[h0]] = memory_usage;
|
||||
buckets_size[h0]++;
|
||||
memory_usage = memory_usage + keylen1 + 1;
|
||||
memory_usage += keylen1 + sizeof(keylen1);
|
||||
}
|
||||
filename = (char *)calloc(strlen((char *)(brz->tmp_dir)) + 11, sizeof(char));
|
||||
sprintf(filename, "%s%u.cmph",brz->tmp_dir, nflushes);
|
||||
@ -254,8 +254,8 @@ static int brz_gen_mphf(cmph_config_t *mph)
|
||||
filename = NULL;
|
||||
for(i = 0; i < nkeys_in_buffer; i++)
|
||||
{
|
||||
keylen1 = strlen((char *)(buffer + keys_index[i])) + 1;
|
||||
fwrite(buffer + keys_index[i], 1, keylen1, tmp_fd);
|
||||
memcpy(&keylen1, buffer + keys_index[i], sizeof(keylen1));
|
||||
fwrite(buffer + keys_index[i], 1, keylen1 + sizeof(keylen1), tmp_fd);
|
||||
}
|
||||
nkeys_in_buffer = 0;
|
||||
memory_usage = 0;
|
||||
@ -264,9 +264,11 @@ static int brz_gen_mphf(cmph_config_t *mph)
|
||||
free(keys_index);
|
||||
fclose(tmp_fd);
|
||||
}
|
||||
memcpy(buffer + memory_usage, key, keylen + 1);
|
||||
memory_usage = memory_usage + keylen + 1;
|
||||
memcpy(buffer + memory_usage, &keylen, sizeof(keylen));
|
||||
memcpy(buffer + memory_usage + sizeof(keylen), key, keylen);
|
||||
memory_usage += keylen + sizeof(keylen);
|
||||
h0 = hash(brz->h0, key, keylen) % brz->k;
|
||||
|
||||
if ((brz->size[h0] == MAX_BUCKET_SIZE) || ((brz->c >= 1.0) && (cmph_uint8)(brz->c * brz->size[h0]) < brz->size[h0]))
|
||||
{
|
||||
free(buffer);
|
||||
@ -278,7 +280,6 @@ static int brz_gen_mphf(cmph_config_t *mph)
|
||||
nkeys_in_buffer++;
|
||||
mph->key_source->dispose(mph->key_source->data, key, keylen);
|
||||
}
|
||||
|
||||
if (memory_usage != 0) // flush buffers
|
||||
{
|
||||
if(mph->verbosity)
|
||||
@ -300,11 +301,11 @@ static int brz_gen_mphf(cmph_config_t *mph)
|
||||
keys_index = (cmph_uint32 *)calloc(nkeys_in_buffer, sizeof(cmph_uint32));
|
||||
for(i = 0; i < nkeys_in_buffer; i++)
|
||||
{
|
||||
keylen1 = strlen((char *)(buffer + memory_usage));
|
||||
h0 = hash(brz->h0, (char *)(buffer + memory_usage), keylen1) % brz->k;
|
||||
memcpy(&keylen1, buffer + memory_usage, sizeof(keylen1));
|
||||
h0 = hash(brz->h0, (char *)(buffer + memory_usage + sizeof(keylen1)), keylen1) % brz->k;
|
||||
keys_index[buckets_size[h0]] = memory_usage;
|
||||
buckets_size[h0]++;
|
||||
memory_usage = memory_usage + keylen1 + 1;
|
||||
memory_usage += keylen1 + sizeof(keylen1);
|
||||
}
|
||||
filename = (char *)calloc(strlen((char *)(brz->tmp_dir)) + 11, sizeof(char));
|
||||
sprintf(filename, "%s%u.cmph",brz->tmp_dir, nflushes);
|
||||
@ -313,8 +314,8 @@ static int brz_gen_mphf(cmph_config_t *mph)
|
||||
filename = NULL;
|
||||
for(i = 0; i < nkeys_in_buffer; i++)
|
||||
{
|
||||
keylen1 = strlen((char *)(buffer + keys_index[i])) + 1;
|
||||
fwrite(buffer + keys_index[i], 1, keylen1, tmp_fd);
|
||||
memcpy(&keylen1, buffer + keys_index[i], sizeof(keylen1));
|
||||
fwrite(buffer + keys_index[i], 1, keylen1 + sizeof(keylen1), tmp_fd);
|
||||
}
|
||||
nkeys_in_buffer = 0;
|
||||
memory_usage = 0;
|
||||
@ -352,50 +353,46 @@ static int brz_gen_mphf(cmph_config_t *mph)
|
||||
buffer_manager_open(buff_manager, i, filename);
|
||||
free(filename);
|
||||
filename = NULL;
|
||||
key = (char *)buffer_manager_read_key(buff_manager, i);
|
||||
keylen = strlen(key);
|
||||
h0 = hash(brz->h0, key, keylen) % brz->k;
|
||||
key = (char *)buffer_manager_read_key(buff_manager, i, &keylen);
|
||||
h0 = hash(brz->h0, key+sizeof(keylen), keylen) % brz->k;
|
||||
buffer_h0[i] = h0;
|
||||
buffer_merge[i] = (cmph_uint8 *)calloc(keylen + 1, sizeof(cmph_uint8));
|
||||
memcpy(buffer_merge[i], key, keylen + 1);
|
||||
free(key);
|
||||
buffer_merge[i] = (cmph_uint8 *)key;
|
||||
key = NULL; //transfer memory ownership
|
||||
}
|
||||
e = 0;
|
||||
keys_vd = (char **)calloc(MAX_BUCKET_SIZE, sizeof(char *));
|
||||
keys_vd = (cmph_uint8 **)calloc(MAX_BUCKET_SIZE, sizeof(cmph_uint8 *));
|
||||
nkeys_vd = 0;
|
||||
while(e < brz->m)
|
||||
{
|
||||
i = brz_min_index(buffer_h0, nflushes);
|
||||
cur_bucket = buffer_h0[i];
|
||||
key = (char *)buffer_manager_read_key(buff_manager, i);
|
||||
key = (char *)buffer_manager_read_key(buff_manager, i, &keylen);
|
||||
if(key)
|
||||
{
|
||||
while(key)
|
||||
{
|
||||
keylen = strlen(key);
|
||||
h0 = hash(brz->h0, key, keylen) % brz->k;
|
||||
//keylen = strlen(key);
|
||||
h0 = hash(brz->h0, key+sizeof(keylen), keylen) % brz->k;
|
||||
if (h0 != buffer_h0[i]) break;
|
||||
keys_vd[nkeys_vd++] = key;
|
||||
keys_vd[nkeys_vd++] = (cmph_uint8 *)key;
|
||||
key = NULL; //transfer memory ownership
|
||||
e++;
|
||||
key = (char *)buffer_manager_read_key(buff_manager, i);
|
||||
key = (char *)buffer_manager_read_key(buff_manager, i, &keylen);
|
||||
}
|
||||
if (key)
|
||||
{
|
||||
assert(nkeys_vd < brz->size[cur_bucket]);
|
||||
keys_vd[nkeys_vd++] = (char *)buffer_merge[i];
|
||||
keys_vd[nkeys_vd++] = buffer_merge[i];
|
||||
buffer_merge[i] = NULL; //transfer memory ownership
|
||||
e++;
|
||||
buffer_h0[i] = h0;
|
||||
buffer_merge[i] = (cmph_uint8 *)calloc(keylen + 1, sizeof(cmph_uint8));
|
||||
memcpy(buffer_merge[i], key, keylen + 1);
|
||||
free(key);
|
||||
buffer_merge[i] = (cmph_uint8 *)key;
|
||||
}
|
||||
}
|
||||
if(!key)
|
||||
{
|
||||
assert(nkeys_vd < brz->size[cur_bucket]);
|
||||
keys_vd[nkeys_vd++] = (char *)buffer_merge[i];
|
||||
keys_vd[nkeys_vd++] = buffer_merge[i];
|
||||
buffer_merge[i] = NULL; //transfer memory ownership
|
||||
e++;
|
||||
buffer_h0[i] = UINT_MAX;
|
||||
@ -410,7 +407,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
|
||||
char *bufmphf = NULL;
|
||||
cmph_uint32 buflenmphf = 0;
|
||||
// Source of keys
|
||||
source = cmph_io_vector_adapter(keys_vd, (cmph_uint32)nkeys_vd);
|
||||
source = cmph_io_byte_vector_adapter(keys_vd, (cmph_uint32)nkeys_vd);
|
||||
config = cmph_config_new(source);
|
||||
cmph_config_set_algo(config, CMPH_BMZ8);
|
||||
cmph_config_set_graphsize(config, brz->c);
|
||||
@ -424,8 +421,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
|
||||
cmph_config_destroy(config);
|
||||
brz_destroy_keys_vd(keys_vd, nkeys_vd);
|
||||
cmph_destroy(mphf_tmp);
|
||||
cmph_io_vector_adapter_destroy(source);
|
||||
|
||||
cmph_io_byte_vector_adapter_destroy(source);
|
||||
nkeys_vd = 0;
|
||||
}
|
||||
}
|
||||
@ -447,7 +443,7 @@ static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n)
|
||||
return min_index;
|
||||
}
|
||||
|
||||
static void brz_destroy_keys_vd(char ** keys_vd, cmph_uint8 nkeys)
|
||||
static void brz_destroy_keys_vd(cmph_uint8 ** keys_vd, cmph_uint8 nkeys)
|
||||
{
|
||||
cmph_uint8 i;
|
||||
for(i = 0; i < nkeys; i++) { free(keys_vd[i]); keys_vd[i] = NULL;}
|
||||
@ -465,7 +461,6 @@ static char * brz_copy_partial_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf,
|
||||
hash_state_dump(bmzf->hashes[1], &bufh2, &buflenh2);
|
||||
*buflen = buflenh1 + buflenh2 + n + 2*sizeof(cmph_uint32);
|
||||
buf = (char *)malloc(*buflen);
|
||||
//fprintf(stderr,"entrei passei\n");
|
||||
memcpy(buf, &buflenh1, sizeof(cmph_uint32));
|
||||
memcpy(buf+sizeof(cmph_uint32), bufh1, buflenh1);
|
||||
memcpy(buf+sizeof(cmph_uint32)+buflenh1, &buflenh2, sizeof(cmph_uint32));
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
struct __buffer_entry_t
|
||||
{
|
||||
@ -51,24 +52,40 @@ void buffer_entry_load(buffer_entry_t * buffer_entry)
|
||||
buffer_entry->pos = 0;
|
||||
}
|
||||
|
||||
cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry)
|
||||
cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry, cmph_uint32 * keylen)
|
||||
{
|
||||
cmph_uint8 * buf = (cmph_uint8 *)malloc(BUFSIZ);
|
||||
cmph_uint32 buf_pos = 0;
|
||||
cmph_uint8 c;
|
||||
while(1)
|
||||
{
|
||||
cmph_uint8 * buf = NULL;
|
||||
cmph_uint32 lacked_bytes = sizeof(*keylen);
|
||||
cmph_uint32 copied_bytes = 0;
|
||||
if(buffer_entry->eof && (buffer_entry->pos == buffer_entry->nbytes)) // end
|
||||
{
|
||||
free(buf);
|
||||
return NULL;
|
||||
}
|
||||
if(buffer_entry->pos == buffer_entry->nbytes) buffer_entry_load(buffer_entry);
|
||||
c = buffer_entry->buff[(buffer_entry->pos)++];
|
||||
buf[buf_pos++] = c;
|
||||
if(c == '\0') break;
|
||||
if(buf_pos % BUFSIZ == 0) buf = (cmph_uint8 *)realloc(buf, buf_pos + BUFSIZ);
|
||||
if((buffer_entry->pos + lacked_bytes) > buffer_entry->nbytes)
|
||||
{
|
||||
copied_bytes = buffer_entry->nbytes - buffer_entry->pos;
|
||||
lacked_bytes = (buffer_entry->pos + lacked_bytes) - buffer_entry->nbytes;
|
||||
if (copied_bytes != 0) memcpy(keylen, buffer_entry->buff + buffer_entry->pos, copied_bytes);
|
||||
buffer_entry_load(buffer_entry);
|
||||
}
|
||||
memcpy(keylen + copied_bytes, buffer_entry->buff + buffer_entry->pos, lacked_bytes);
|
||||
buffer_entry->pos += lacked_bytes;
|
||||
|
||||
lacked_bytes = *keylen;
|
||||
copied_bytes = 0;
|
||||
buf = (cmph_uint8 *)malloc(*keylen + sizeof(*keylen));
|
||||
memcpy(buf, keylen, sizeof(*keylen));
|
||||
if((buffer_entry->pos + lacked_bytes) > buffer_entry->nbytes) {
|
||||
copied_bytes = buffer_entry->nbytes - buffer_entry->pos;
|
||||
lacked_bytes = (buffer_entry->pos + lacked_bytes) - buffer_entry->nbytes;
|
||||
if (copied_bytes != 0) {
|
||||
memcpy(buf + sizeof(*keylen), buffer_entry->buff + buffer_entry->pos, copied_bytes);
|
||||
}
|
||||
buffer_entry_load(buffer_entry);
|
||||
}
|
||||
memcpy(buf+sizeof(*keylen)+copied_bytes, buffer_entry->buff + buffer_entry->pos, lacked_bytes);
|
||||
buffer_entry->pos += lacked_bytes;
|
||||
return buf;
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,6 @@ buffer_entry_t * buffer_entry_new(cmph_uint32 capacity);
|
||||
void buffer_entry_set_capacity(buffer_entry_t * buffer_entry, cmph_uint32 capacity);
|
||||
cmph_uint32 buffer_entry_get_capacity(buffer_entry_t * buffer_entry);
|
||||
void buffer_entry_open(buffer_entry_t * buffer_entry, char * filename);
|
||||
cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry);
|
||||
cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry, cmph_uint32 * keylen);
|
||||
void buffer_entry_destroy(buffer_entry_t * buffer_entry);
|
||||
#endif
|
||||
|
@ -35,20 +35,18 @@ void buffer_manager_open(buffer_manager_t * buffer_manager, cmph_uint32 index, c
|
||||
buffer_entry_open(buffer_manager->buffer_entries[index], filename);
|
||||
}
|
||||
|
||||
cmph_uint8 * buffer_manager_read_key(buffer_manager_t * buffer_manager, cmph_uint32 index)
|
||||
cmph_uint8 * buffer_manager_read_key(buffer_manager_t * buffer_manager, cmph_uint32 index, cmph_uint32 * keylen)
|
||||
{
|
||||
cmph_uint8 * key = NULL;
|
||||
if (buffer_manager->pos_avail_list >= 0 ) // recovering memory
|
||||
{
|
||||
cmph_uint32 new_capacity = buffer_entry_get_capacity(buffer_manager->buffer_entries[index]) + buffer_manager->memory_avail_list[(buffer_manager->pos_avail_list)--];
|
||||
buffer_entry_set_capacity(buffer_manager->buffer_entries[index], new_capacity);
|
||||
//fprintf(stderr, "recovering memory\n");
|
||||
}
|
||||
key = buffer_entry_read_key(buffer_manager->buffer_entries[index]);
|
||||
key = buffer_entry_read_key(buffer_manager->buffer_entries[index], keylen);
|
||||
if (key == NULL) // storing memory to be recovered
|
||||
{
|
||||
buffer_manager->memory_avail_list[++(buffer_manager->pos_avail_list)] = buffer_entry_get_capacity(buffer_manager->buffer_entries[index]);
|
||||
//fprintf(stderr, "storing memory to be recovered\n");
|
||||
}
|
||||
return key;
|
||||
}
|
||||
|
@ -7,6 +7,6 @@ typedef struct __buffer_manager_t buffer_manager_t;
|
||||
|
||||
buffer_manager_t * buffer_manager_new(cmph_uint32 memory_avail, cmph_uint32 nentries);
|
||||
void buffer_manager_open(buffer_manager_t * buffer_manager, cmph_uint32 index, char * filename);
|
||||
cmph_uint8 * buffer_manager_read_key(buffer_manager_t * buffer_manager, cmph_uint32 index);
|
||||
cmph_uint8 * buffer_manager_read_key(buffer_manager_t * buffer_manager, cmph_uint32 index, cmph_uint32 * keylen);
|
||||
void buffer_manager_destroy(buffer_manager_t * buffer_manager);
|
||||
#endif
|
||||
|
36
src/cmph.c
36
src/cmph.c
@ -47,22 +47,22 @@ static int key_nlfile_read(void *data, char **key, cmph_uint32 *keylen)
|
||||
return *keylen;
|
||||
}
|
||||
|
||||
static int key_byte_vector_read(void *data, char **key, cmph_uint32 *keylen)
|
||||
{
|
||||
cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
|
||||
cmph_uint8 **keys_vd = (cmph_uint8 **)cmph_vector->vector;
|
||||
memcpy(keylen, keys_vd[cmph_vector->position], sizeof(*keylen));
|
||||
*key = (char *)malloc(*keylen);
|
||||
memcpy(*key, keys_vd[cmph_vector->position] + sizeof(*keylen), *keylen);
|
||||
cmph_vector->position = cmph_vector->position + 1;
|
||||
return *keylen;
|
||||
|
||||
}
|
||||
|
||||
static int key_vector_read(void *data, char **key, cmph_uint32 *keylen)
|
||||
{
|
||||
/*
|
||||
cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
|
||||
char **keys_vd = (char **)cmph_vector->vector;
|
||||
|
||||
if (keys_vd + cmph_vector->position == NULL) return -1;
|
||||
*keylen = strlen(*(keys_vd + cmph_vector->position));
|
||||
*key = (char *)malloc(*keylen + 1);
|
||||
strcpy(*key, *(keys_vd + cmph_vector->position));
|
||||
cmph_vector->position = cmph_vector->position + 1;
|
||||
*/
|
||||
cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
|
||||
char **keys_vd = (char **)cmph_vector->vector;
|
||||
|
||||
// if (keys_vd + cmph_vector->position == NULL) return -1;
|
||||
*keylen = strlen(keys_vd[cmph_vector->position]);
|
||||
*key = (char *)malloc(*keylen + 1);
|
||||
strcpy(*key, keys_vd[cmph_vector->position]);
|
||||
@ -165,6 +165,18 @@ static void cmph_io_vector_destroy(cmph_io_adapter_t * key_source)
|
||||
free(key_source);
|
||||
}
|
||||
|
||||
cmph_io_adapter_t *cmph_io_byte_vector_adapter(cmph_uint8 ** vector, cmph_uint32 nkeys)
|
||||
{
|
||||
cmph_io_adapter_t * key_source = cmph_io_vector_new(vector, nkeys);
|
||||
key_source->read = key_byte_vector_read;
|
||||
key_source->dispose = key_vector_dispose;
|
||||
key_source->rewind = key_vector_rewind;
|
||||
return key_source;
|
||||
}
|
||||
void cmph_io_byte_vector_adapter_destroy(cmph_io_adapter_t * key_source)
|
||||
{
|
||||
cmph_io_vector_destroy(key_source);
|
||||
}
|
||||
cmph_io_adapter_t *cmph_io_vector_adapter(char ** vector, cmph_uint32 nkeys)
|
||||
{
|
||||
cmph_io_adapter_t * key_source = cmph_io_vector_new(vector, nkeys);
|
||||
|
@ -34,6 +34,9 @@ void cmph_io_nlnkfile_adapter_destroy(cmph_io_adapter_t * key_source);
|
||||
cmph_io_adapter_t *cmph_io_vector_adapter(char ** vector, cmph_uint32 nkeys);
|
||||
void cmph_io_vector_adapter_destroy(cmph_io_adapter_t * key_source);
|
||||
|
||||
cmph_io_adapter_t *cmph_io_byte_vector_adapter(cmph_uint8 ** vector, cmph_uint32 nkeys);
|
||||
void cmph_io_byte_vector_adapter_destroy(cmph_io_adapter_t * key_source);
|
||||
|
||||
/** Hash configuration API **/
|
||||
cmph_config_t *cmph_config_new(cmph_io_adapter_t *key_source);
|
||||
void cmph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
|
||||
|
Loading…
Reference in New Issue
Block a user