strlen fuction removed from BRZ algorithm

This commit is contained in:
fc_botelho 2006-07-28 22:36:50 +00:00
parent 789e5d39b1
commit 0e8b3df922
9 changed files with 132 additions and 100 deletions

49
INSTALL
View File

@ -1,5 +1,8 @@
Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software
Foundation, Inc.
Installation Instructions
*************************
Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005 Free
Software Foundation, Inc.
This file is free documentation; the Free Software Foundation gives
unlimited permission to copy, distribute and modify it.
@ -67,9 +70,9 @@ The simplest way to compile this package is:
Compilers and Options
=====================
Some systems require unusual options for compilation or linking that
the `configure' script does not know about. Run `./configure --help'
for details on some of the pertinent environment variables.
Some systems require unusual options for compilation or linking that the
`configure' script does not know about. Run `./configure --help' for
details on some of the pertinent environment variables.
You can give `configure' initial values for configuration parameters
by setting variables in the command line or in the environment. Here
@ -102,16 +105,16 @@ Installation Names
By default, `make install' will install the package's files in
`/usr/local/bin', `/usr/local/man', etc. You can specify an
installation prefix other than `/usr/local' by giving `configure' the
option `--prefix=PATH'.
option `--prefix=PREFIX'.
You can specify separate installation prefixes for
architecture-specific files and architecture-independent files. If you
give `configure' the option `--exec-prefix=PATH', the package will use
PATH as the prefix for installing programs and libraries.
give `configure' the option `--exec-prefix=PREFIX', the package will
use PREFIX as the prefix for installing programs and libraries.
Documentation and other data files will still use the regular prefix.
In addition, if you use an unusual directory layout you can give
options like `--bindir=PATH' to specify different values for particular
options like `--bindir=DIR' to specify different values for particular
kinds of files. Run `configure --help' for a list of the directories
you can set and what kinds of files go in them.
@ -137,11 +140,11 @@ you can use the `configure' options `--x-includes=DIR' and
Specifying the System Type
==========================
There may be some features `configure' cannot figure out
automatically, but needs to determine by the type of machine the package
will run on. Usually, assuming the package is built to be run on the
_same_ architectures, `configure' can figure that out, but if it prints
a message saying it cannot guess the machine type, give it the
There may be some features `configure' cannot figure out automatically,
but needs to determine by the type of machine the package will run on.
Usually, assuming the package is built to be run on the _same_
architectures, `configure' can figure that out, but if it prints a
message saying it cannot guess the machine type, give it the
`--build=TYPE' option. TYPE can either be a short name for the system
type, such as `sun4', or a canonical name which has the form:
@ -167,9 +170,9 @@ eventually be run) with `--host=TYPE'.
Sharing Defaults
================
If you want to set default values for `configure' scripts to share,
you can create a site shell script called `config.site' that gives
default values for variables like `CC', `cache_file', and `prefix'.
If you want to set default values for `configure' scripts to share, you
can create a site shell script called `config.site' that gives default
values for variables like `CC', `cache_file', and `prefix'.
`configure' looks for `PREFIX/share/config.site' if it exists, then
`PREFIX/etc/config.site' if it exists. Or, you can set the
`CONFIG_SITE' environment variable to the location of the site script.
@ -186,14 +189,18 @@ them in the `configure' command line, using `VAR=value'. For example:
./configure CC=/usr/local2/bin/gcc
will cause the specified gcc to be used as the C compiler (unless it is
overridden in the site shell script).
causes the specified `gcc' to be used as the C compiler (unless it is
overridden in the site shell script). Here is a another example:
/bin/bash ./configure CONFIG_SHELL=/bin/bash
Here the `CONFIG_SHELL=/bin/bash' operand causes subsequent
configuration-related scripts to be executed by `/bin/bash'.
`configure' Invocation
======================
`configure' recognizes the following options to control how it
operates.
`configure' recognizes the following options to control how it operates.
`--help'
`-h'

View File

@ -142,7 +142,7 @@ cmph_t *bmz8_new(cmph_config_t *mph, float c)
used_edges = (cmph_uint8 *)malloc(bmz8->m/8 + 1);
memset(used_edges, 0, bmz8->m/8 + 1);
free(bmz8->g);
bmz8->g = (cmph_uint32 *)calloc(bmz8->n, sizeof(cmph_uint8));
bmz8->g = (cmph_uint8 *)calloc(bmz8->n, sizeof(cmph_uint8));
assert(bmz8->g);
for (i = 0; i < bmz8->n; ++i) // critical nodes
{

View File

@ -19,7 +19,7 @@
static int brz_gen_mphf(cmph_config_t *mph);
static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n);
static void brz_destroy_keys_vd(char ** keys_vd, cmph_uint8 nkeys);
static void brz_destroy_keys_vd(cmph_uint8 ** keys_vd, cmph_uint8 nkeys);
static char * brz_copy_partial_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_uint32 *buflen);
brz_config_data_t *brz_config_new()
{
@ -209,7 +209,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
cmph_uint32 keylen;
cmph_uint32 cur_bucket = 0;
cmph_uint8 nkeys_vd = 0;
char ** keys_vd = NULL;
cmph_uint8 ** keys_vd = NULL;
mph->key_source->rewind(mph->key_source->data);
DEBUGP("Generating graphs from %u keys\n", brz->m);
@ -219,7 +219,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
mph->key_source->read(mph->key_source->data, &key, &keylen);
/* Buffers management */
if (memory_usage + keylen + 1 > brz->memory_availability) // flush buffers
if (memory_usage + keylen + sizeof(keylen) > brz->memory_availability) // flush buffers
{
if(mph->verbosity)
{
@ -241,11 +241,11 @@ static int brz_gen_mphf(cmph_config_t *mph)
keys_index = (cmph_uint32 *)calloc(nkeys_in_buffer, sizeof(cmph_uint32));
for(i = 0; i < nkeys_in_buffer; i++)
{
keylen1 = strlen((char *)(buffer + memory_usage));
h0 = hash(brz->h0, (char *)(buffer + memory_usage), keylen1) % brz->k;
memcpy(&keylen1, buffer + memory_usage, sizeof(keylen1));
h0 = hash(brz->h0, (char *)(buffer + memory_usage + sizeof(keylen1)), keylen1) % brz->k;
keys_index[buckets_size[h0]] = memory_usage;
buckets_size[h0]++;
memory_usage = memory_usage + keylen1 + 1;
memory_usage += keylen1 + sizeof(keylen1);
}
filename = (char *)calloc(strlen((char *)(brz->tmp_dir)) + 11, sizeof(char));
sprintf(filename, "%s%u.cmph",brz->tmp_dir, nflushes);
@ -254,8 +254,8 @@ static int brz_gen_mphf(cmph_config_t *mph)
filename = NULL;
for(i = 0; i < nkeys_in_buffer; i++)
{
keylen1 = strlen((char *)(buffer + keys_index[i])) + 1;
fwrite(buffer + keys_index[i], 1, keylen1, tmp_fd);
memcpy(&keylen1, buffer + keys_index[i], sizeof(keylen1));
fwrite(buffer + keys_index[i], 1, keylen1 + sizeof(keylen1), tmp_fd);
}
nkeys_in_buffer = 0;
memory_usage = 0;
@ -264,9 +264,11 @@ static int brz_gen_mphf(cmph_config_t *mph)
free(keys_index);
fclose(tmp_fd);
}
memcpy(buffer + memory_usage, key, keylen + 1);
memory_usage = memory_usage + keylen + 1;
memcpy(buffer + memory_usage, &keylen, sizeof(keylen));
memcpy(buffer + memory_usage + sizeof(keylen), key, keylen);
memory_usage += keylen + sizeof(keylen);
h0 = hash(brz->h0, key, keylen) % brz->k;
if ((brz->size[h0] == MAX_BUCKET_SIZE) || ((brz->c >= 1.0) && (cmph_uint8)(brz->c * brz->size[h0]) < brz->size[h0]))
{
free(buffer);
@ -278,7 +280,6 @@ static int brz_gen_mphf(cmph_config_t *mph)
nkeys_in_buffer++;
mph->key_source->dispose(mph->key_source->data, key, keylen);
}
if (memory_usage != 0) // flush buffers
{
if(mph->verbosity)
@ -300,11 +301,11 @@ static int brz_gen_mphf(cmph_config_t *mph)
keys_index = (cmph_uint32 *)calloc(nkeys_in_buffer, sizeof(cmph_uint32));
for(i = 0; i < nkeys_in_buffer; i++)
{
keylen1 = strlen((char *)(buffer + memory_usage));
h0 = hash(brz->h0, (char *)(buffer + memory_usage), keylen1) % brz->k;
memcpy(&keylen1, buffer + memory_usage, sizeof(keylen1));
h0 = hash(brz->h0, (char *)(buffer + memory_usage + sizeof(keylen1)), keylen1) % brz->k;
keys_index[buckets_size[h0]] = memory_usage;
buckets_size[h0]++;
memory_usage = memory_usage + keylen1 + 1;
memory_usage += keylen1 + sizeof(keylen1);
}
filename = (char *)calloc(strlen((char *)(brz->tmp_dir)) + 11, sizeof(char));
sprintf(filename, "%s%u.cmph",brz->tmp_dir, nflushes);
@ -313,8 +314,8 @@ static int brz_gen_mphf(cmph_config_t *mph)
filename = NULL;
for(i = 0; i < nkeys_in_buffer; i++)
{
keylen1 = strlen((char *)(buffer + keys_index[i])) + 1;
fwrite(buffer + keys_index[i], 1, keylen1, tmp_fd);
memcpy(&keylen1, buffer + keys_index[i], sizeof(keylen1));
fwrite(buffer + keys_index[i], 1, keylen1 + sizeof(keylen1), tmp_fd);
}
nkeys_in_buffer = 0;
memory_usage = 0;
@ -352,50 +353,46 @@ static int brz_gen_mphf(cmph_config_t *mph)
buffer_manager_open(buff_manager, i, filename);
free(filename);
filename = NULL;
key = (char *)buffer_manager_read_key(buff_manager, i);
keylen = strlen(key);
h0 = hash(brz->h0, key, keylen) % brz->k;
key = (char *)buffer_manager_read_key(buff_manager, i, &keylen);
h0 = hash(brz->h0, key+sizeof(keylen), keylen) % brz->k;
buffer_h0[i] = h0;
buffer_merge[i] = (cmph_uint8 *)calloc(keylen + 1, sizeof(cmph_uint8));
memcpy(buffer_merge[i], key, keylen + 1);
free(key);
buffer_merge[i] = (cmph_uint8 *)key;
key = NULL; //transfer memory ownership
}
e = 0;
keys_vd = (char **)calloc(MAX_BUCKET_SIZE, sizeof(char *));
keys_vd = (cmph_uint8 **)calloc(MAX_BUCKET_SIZE, sizeof(cmph_uint8 *));
nkeys_vd = 0;
while(e < brz->m)
{
i = brz_min_index(buffer_h0, nflushes);
cur_bucket = buffer_h0[i];
key = (char *)buffer_manager_read_key(buff_manager, i);
key = (char *)buffer_manager_read_key(buff_manager, i, &keylen);
if(key)
{
while(key)
{
keylen = strlen(key);
h0 = hash(brz->h0, key, keylen) % brz->k;
//keylen = strlen(key);
h0 = hash(brz->h0, key+sizeof(keylen), keylen) % brz->k;
if (h0 != buffer_h0[i]) break;
keys_vd[nkeys_vd++] = key;
keys_vd[nkeys_vd++] = (cmph_uint8 *)key;
key = NULL; //transfer memory ownership
e++;
key = (char *)buffer_manager_read_key(buff_manager, i);
key = (char *)buffer_manager_read_key(buff_manager, i, &keylen);
}
if (key)
{
assert(nkeys_vd < brz->size[cur_bucket]);
keys_vd[nkeys_vd++] = (char *)buffer_merge[i];
keys_vd[nkeys_vd++] = buffer_merge[i];
buffer_merge[i] = NULL; //transfer memory ownership
e++;
buffer_h0[i] = h0;
buffer_merge[i] = (cmph_uint8 *)calloc(keylen + 1, sizeof(cmph_uint8));
memcpy(buffer_merge[i], key, keylen + 1);
free(key);
buffer_merge[i] = (cmph_uint8 *)key;
}
}
if(!key)
{
assert(nkeys_vd < brz->size[cur_bucket]);
keys_vd[nkeys_vd++] = (char *)buffer_merge[i];
keys_vd[nkeys_vd++] = buffer_merge[i];
buffer_merge[i] = NULL; //transfer memory ownership
e++;
buffer_h0[i] = UINT_MAX;
@ -410,7 +407,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
char *bufmphf = NULL;
cmph_uint32 buflenmphf = 0;
// Source of keys
source = cmph_io_vector_adapter(keys_vd, (cmph_uint32)nkeys_vd);
source = cmph_io_byte_vector_adapter(keys_vd, (cmph_uint32)nkeys_vd);
config = cmph_config_new(source);
cmph_config_set_algo(config, CMPH_BMZ8);
cmph_config_set_graphsize(config, brz->c);
@ -424,8 +421,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
cmph_config_destroy(config);
brz_destroy_keys_vd(keys_vd, nkeys_vd);
cmph_destroy(mphf_tmp);
cmph_io_vector_adapter_destroy(source);
cmph_io_byte_vector_adapter_destroy(source);
nkeys_vd = 0;
}
}
@ -447,7 +443,7 @@ static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n)
return min_index;
}
static void brz_destroy_keys_vd(char ** keys_vd, cmph_uint8 nkeys)
static void brz_destroy_keys_vd(cmph_uint8 ** keys_vd, cmph_uint8 nkeys)
{
cmph_uint8 i;
for(i = 0; i < nkeys; i++) { free(keys_vd[i]); keys_vd[i] = NULL;}
@ -465,7 +461,6 @@ static char * brz_copy_partial_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf,
hash_state_dump(bmzf->hashes[1], &bufh2, &buflenh2);
*buflen = buflenh1 + buflenh2 + n + 2*sizeof(cmph_uint32);
buf = (char *)malloc(*buflen);
//fprintf(stderr,"entrei passei\n");
memcpy(buf, &buflenh1, sizeof(cmph_uint32));
memcpy(buf+sizeof(cmph_uint32), bufh1, buflenh1);
memcpy(buf+sizeof(cmph_uint32)+buflenh1, &buflenh2, sizeof(cmph_uint32));

View File

@ -2,6 +2,7 @@
#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
struct __buffer_entry_t
{
@ -51,24 +52,40 @@ void buffer_entry_load(buffer_entry_t * buffer_entry)
buffer_entry->pos = 0;
}
cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry)
{
cmph_uint8 * buf = (cmph_uint8 *)malloc(BUFSIZ);
cmph_uint32 buf_pos = 0;
cmph_uint8 c;
while(1)
cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry, cmph_uint32 * keylen)
{
cmph_uint8 * buf = NULL;
cmph_uint32 lacked_bytes = sizeof(*keylen);
cmph_uint32 copied_bytes = 0;
if(buffer_entry->eof && (buffer_entry->pos == buffer_entry->nbytes)) // end
{
free(buf);
return NULL;
}
if(buffer_entry->pos == buffer_entry->nbytes) buffer_entry_load(buffer_entry);
c = buffer_entry->buff[(buffer_entry->pos)++];
buf[buf_pos++] = c;
if(c == '\0') break;
if(buf_pos % BUFSIZ == 0) buf = (cmph_uint8 *)realloc(buf, buf_pos + BUFSIZ);
if((buffer_entry->pos + lacked_bytes) > buffer_entry->nbytes)
{
copied_bytes = buffer_entry->nbytes - buffer_entry->pos;
lacked_bytes = (buffer_entry->pos + lacked_bytes) - buffer_entry->nbytes;
if (copied_bytes != 0) memcpy(keylen, buffer_entry->buff + buffer_entry->pos, copied_bytes);
buffer_entry_load(buffer_entry);
}
memcpy(keylen + copied_bytes, buffer_entry->buff + buffer_entry->pos, lacked_bytes);
buffer_entry->pos += lacked_bytes;
lacked_bytes = *keylen;
copied_bytes = 0;
buf = (cmph_uint8 *)malloc(*keylen + sizeof(*keylen));
memcpy(buf, keylen, sizeof(*keylen));
if((buffer_entry->pos + lacked_bytes) > buffer_entry->nbytes) {
copied_bytes = buffer_entry->nbytes - buffer_entry->pos;
lacked_bytes = (buffer_entry->pos + lacked_bytes) - buffer_entry->nbytes;
if (copied_bytes != 0) {
memcpy(buf + sizeof(*keylen), buffer_entry->buff + buffer_entry->pos, copied_bytes);
}
buffer_entry_load(buffer_entry);
}
memcpy(buf+sizeof(*keylen)+copied_bytes, buffer_entry->buff + buffer_entry->pos, lacked_bytes);
buffer_entry->pos += lacked_bytes;
return buf;
}

View File

@ -9,6 +9,6 @@ buffer_entry_t * buffer_entry_new(cmph_uint32 capacity);
void buffer_entry_set_capacity(buffer_entry_t * buffer_entry, cmph_uint32 capacity);
cmph_uint32 buffer_entry_get_capacity(buffer_entry_t * buffer_entry);
void buffer_entry_open(buffer_entry_t * buffer_entry, char * filename);
cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry);
cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry, cmph_uint32 * keylen);
void buffer_entry_destroy(buffer_entry_t * buffer_entry);
#endif

View File

@ -35,20 +35,18 @@ void buffer_manager_open(buffer_manager_t * buffer_manager, cmph_uint32 index, c
buffer_entry_open(buffer_manager->buffer_entries[index], filename);
}
cmph_uint8 * buffer_manager_read_key(buffer_manager_t * buffer_manager, cmph_uint32 index)
cmph_uint8 * buffer_manager_read_key(buffer_manager_t * buffer_manager, cmph_uint32 index, cmph_uint32 * keylen)
{
cmph_uint8 * key = NULL;
if (buffer_manager->pos_avail_list >= 0 ) // recovering memory
{
cmph_uint32 new_capacity = buffer_entry_get_capacity(buffer_manager->buffer_entries[index]) + buffer_manager->memory_avail_list[(buffer_manager->pos_avail_list)--];
buffer_entry_set_capacity(buffer_manager->buffer_entries[index], new_capacity);
//fprintf(stderr, "recovering memory\n");
}
key = buffer_entry_read_key(buffer_manager->buffer_entries[index]);
key = buffer_entry_read_key(buffer_manager->buffer_entries[index], keylen);
if (key == NULL) // storing memory to be recovered
{
buffer_manager->memory_avail_list[++(buffer_manager->pos_avail_list)] = buffer_entry_get_capacity(buffer_manager->buffer_entries[index]);
//fprintf(stderr, "storing memory to be recovered\n");
}
return key;
}

View File

@ -7,6 +7,6 @@ typedef struct __buffer_manager_t buffer_manager_t;
buffer_manager_t * buffer_manager_new(cmph_uint32 memory_avail, cmph_uint32 nentries);
void buffer_manager_open(buffer_manager_t * buffer_manager, cmph_uint32 index, char * filename);
cmph_uint8 * buffer_manager_read_key(buffer_manager_t * buffer_manager, cmph_uint32 index);
cmph_uint8 * buffer_manager_read_key(buffer_manager_t * buffer_manager, cmph_uint32 index, cmph_uint32 * keylen);
void buffer_manager_destroy(buffer_manager_t * buffer_manager);
#endif

View File

@ -47,22 +47,22 @@ static int key_nlfile_read(void *data, char **key, cmph_uint32 *keylen)
return *keylen;
}
static int key_byte_vector_read(void *data, char **key, cmph_uint32 *keylen)
{
cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
cmph_uint8 **keys_vd = (cmph_uint8 **)cmph_vector->vector;
memcpy(keylen, keys_vd[cmph_vector->position], sizeof(*keylen));
*key = (char *)malloc(*keylen);
memcpy(*key, keys_vd[cmph_vector->position] + sizeof(*keylen), *keylen);
cmph_vector->position = cmph_vector->position + 1;
return *keylen;
}
static int key_vector_read(void *data, char **key, cmph_uint32 *keylen)
{
/*
cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
char **keys_vd = (char **)cmph_vector->vector;
if (keys_vd + cmph_vector->position == NULL) return -1;
*keylen = strlen(*(keys_vd + cmph_vector->position));
*key = (char *)malloc(*keylen + 1);
strcpy(*key, *(keys_vd + cmph_vector->position));
cmph_vector->position = cmph_vector->position + 1;
*/
cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
char **keys_vd = (char **)cmph_vector->vector;
// if (keys_vd + cmph_vector->position == NULL) return -1;
*keylen = strlen(keys_vd[cmph_vector->position]);
*key = (char *)malloc(*keylen + 1);
strcpy(*key, keys_vd[cmph_vector->position]);
@ -165,6 +165,18 @@ static void cmph_io_vector_destroy(cmph_io_adapter_t * key_source)
free(key_source);
}
cmph_io_adapter_t *cmph_io_byte_vector_adapter(cmph_uint8 ** vector, cmph_uint32 nkeys)
{
cmph_io_adapter_t * key_source = cmph_io_vector_new(vector, nkeys);
key_source->read = key_byte_vector_read;
key_source->dispose = key_vector_dispose;
key_source->rewind = key_vector_rewind;
return key_source;
}
void cmph_io_byte_vector_adapter_destroy(cmph_io_adapter_t * key_source)
{
cmph_io_vector_destroy(key_source);
}
cmph_io_adapter_t *cmph_io_vector_adapter(char ** vector, cmph_uint32 nkeys)
{
cmph_io_adapter_t * key_source = cmph_io_vector_new(vector, nkeys);

View File

@ -34,6 +34,9 @@ void cmph_io_nlnkfile_adapter_destroy(cmph_io_adapter_t * key_source);
cmph_io_adapter_t *cmph_io_vector_adapter(char ** vector, cmph_uint32 nkeys);
void cmph_io_vector_adapter_destroy(cmph_io_adapter_t * key_source);
cmph_io_adapter_t *cmph_io_byte_vector_adapter(cmph_uint8 ** vector, cmph_uint32 nkeys);
void cmph_io_byte_vector_adapter_destroy(cmph_io_adapter_t * key_source);
/** Hash configuration API **/
cmph_config_t *cmph_config_new(cmph_io_adapter_t *key_source);
void cmph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);