1
Fork 0

temporary directory passed by command line

This commit is contained in:
fc_botelho 2005-08-08 01:00:27 +00:00
parent 114c4e1c63
commit 987870bc59
6 changed files with 129 additions and 106 deletions

201
src/brz.c
View File

@ -20,10 +20,7 @@
static int brz_gen_graphs(cmph_config_t *mph);
static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n);
static void flush_buffer(cmph_uint8 *buffer, cmph_uint32 *memory_usage, FILE * graphs_fd);
static void save_in_disk(cmph_uint8 *buffer, cmph_uint8 * key, cmph_uint32 keylen, cmph_uint32 *memory_usage, cmph_uint32 memory_availability, FILE * graphs_fd);
static char * brz_read_key(FILE * fd);
static char ** brz_read_keys_vd(FILE * graphs_fd, cmph_uint8 nkeys);
static void brz_destroy_keys_vd(char ** keys_vd, cmph_uint8 nkeys);
static void brz_copy_partial_mphf(brz_config_data_t *brz, bmz_data_t * bmzf, cmph_uint32 index, cmph_io_adapter_t *source);
@ -40,6 +37,8 @@ brz_config_data_t *brz_config_new()
brz->h1 = NULL;
brz->h2 = NULL;
brz->h3 = NULL;
brz->tmp_dir = (cmph_uint8 *)calloc(10, sizeof(cmph_uint8));
strcpy(brz->tmp_dir, "/var/tmp/\0");
assert(brz);
return brz;
}
@ -48,6 +47,7 @@ void brz_config_destroy(cmph_config_t *mph)
{
brz_config_data_t *data = (brz_config_data_t *)mph->data;
DEBUGP("Destroying algorithm dependent data\n");
free(data->tmp_dir);
free(data);
}
@ -63,73 +63,94 @@ void brz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
++i, ++hashptr;
}
}
static cmph_uint8 brz_verify_mphf(cmph_t * mphf, cmph_io_adapter_t *source)
void brz_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir)
{
cmph_uint8 * hashtable = NULL;
cmph_uint32 i;
hashtable = (cmph_uint8*)malloc(source->nkeys*sizeof(cmph_uint8));
source->rewind(source->data);
memset(hashtable, 0, source->nkeys);
//check all keys
for (i = 0; i < source->nkeys; ++i)
brz_config_data_t *brz = (brz_config_data_t *)mph->data;
if(tmp_dir)
{
cmph_uint32 h;
char *buf;
cmph_uint32 buflen = 0;
source->read(source->data, &buf, &buflen);
h = cmph_search(mphf, buf, buflen);
if(hashtable[h])
cmph_uint32 len = strlen(tmp_dir);
free(brz->tmp_dir);
if(tmp_dir[len-1] != '/')
{
fprintf(stderr, "collision: %u\n",h);
return 0;
brz->tmp_dir = calloc(len+2, sizeof(cmph_uint8));
sprintf(brz->tmp_dir, "%s/\0", tmp_dir);
}
else
{
brz->tmp_dir = calloc(len+1, sizeof(cmph_uint8));
sprintf(brz->tmp_dir, "%s\0", tmp_dir);
}
//assert(hashtable[h]==0);
hashtable[h] = 1;
source->dispose(source->data, buf, buflen);
}
fprintf(stderr, "\n===============================================================================\n");
free(hashtable);
return 1;
}
static cmph_uint8 brz_verify_mphf1(hash_state_t *h1, hash_state_t *h2, cmph_uint8 * g, cmph_uint32 n, cmph_io_adapter_t *source)
{
cmph_uint8 * hashtable = NULL;
cmph_uint32 i;
hashtable = (cmph_uint8*)calloc(source->nkeys, sizeof(cmph_uint8));
source->rewind(source->data);
//memset(hashtable, 0, source->nkeys);
//check all keys
for (i = 0; i < source->nkeys; ++i)
{
cmph_uint32 h1_v;
cmph_uint32 h2_v;
cmph_uint32 h;
char *buf;
cmph_uint32 buflen = 0;
source->read(source->data, &buf, &buflen);
h1_v = hash(h1, buf, buflen) % n;
h2_v = hash(h2, buf, buflen) % n;
if (h1_v == h2_v && ++h2_v >= n) h2_v = 0;
h = ((cmph_uint32)g[h1_v] + (cmph_uint32)g[h2_v]) % source->nkeys;
if(hashtable[h])
{
fprintf(stderr, "collision: %u\n",h);
return 0;
}
//assert(hashtable[h]==0);
hashtable[h] = 1;
source->dispose(source->data, buf, buflen);
}
free(hashtable);
return 1;
}
// static cmph_uint8 brz_verify_mphf(cmph_t * mphf, cmph_io_adapter_t *source)
// {
// cmph_uint8 * hashtable = NULL;
// cmph_uint32 i;
// hashtable = (cmph_uint8*)malloc(source->nkeys*sizeof(cmph_uint8));
// source->rewind(source->data);
// memset(hashtable, 0, source->nkeys);
// //check all keys
// for (i = 0; i < source->nkeys; ++i)
// {
// cmph_uint32 h;
// char *buf;
// cmph_uint32 buflen = 0;
// source->read(source->data, &buf, &buflen);
// h = cmph_search(mphf, buf, buflen);
// if(hashtable[h])
// {
// fprintf(stderr, "collision: %u\n",h);
// return 0;
// }
// //assert(hashtable[h]==0);
// hashtable[h] = 1;
// source->dispose(source->data, buf, buflen);
// }
// fprintf(stderr, "\n===============================================================================\n");
// free(hashtable);
// return 1;
// }
//
// static cmph_uint8 brz_verify_mphf1(hash_state_t *h1, hash_state_t *h2, cmph_uint8 * g, cmph_uint32 n, cmph_io_adapter_t *source)
// {
// cmph_uint8 * hashtable = NULL;
// cmph_uint32 i;
// hashtable = (cmph_uint8*)calloc(source->nkeys, sizeof(cmph_uint8));
// source->rewind(source->data);
// //memset(hashtable, 0, source->nkeys);
// //check all keys
// for (i = 0; i < source->nkeys; ++i)
// {
// cmph_uint32 h1_v;
// cmph_uint32 h2_v;
// cmph_uint32 h;
// char *buf;
// cmph_uint32 buflen = 0;
// source->read(source->data, &buf, &buflen);
//
// h1_v = hash(h1, buf, buflen) % n;
//
// h2_v = hash(h2, buf, buflen) % n;
//
// if (h1_v == h2_v && ++h2_v >= n) h2_v = 0;
//
// h = ((cmph_uint32)g[h1_v] + (cmph_uint32)g[h2_v]) % source->nkeys;
//
// if(hashtable[h])
// {
// fprintf(stderr, "collision: %u\n",h);
// return 0;
// }
// //assert(hashtable[h]==0);
// hashtable[h] = 1;
// source->dispose(source->data, buf, buflen);
//
// }
// free(hashtable);
// return 1;
// }
cmph_t *brz_new(cmph_config_t *mph, float c)
{
@ -238,7 +259,7 @@ static int brz_gen_graphs(cmph_config_t *mph)
cmph_uint32 h3;
FILE * tmp_fd = NULL;
FILE ** tmp_fds = NULL;
char filename[100];
char *filename = NULL;
char *key = NULL;
cmph_uint32 keylen;
@ -285,9 +306,11 @@ static int brz_gen_graphs(cmph_config_t *mph)
memory_usage = memory_usage + keylen1 + 1;
}
// sprintf(filename, "/mnt/hd4/fbotelho/%u.cmph",nflushes);
sprintf(filename, "/mnt/sd2/fbotelho/dados/%u.cmph",nflushes);
/* sprintf(filename, "%u.cmph",nflushes);*/
filename = (char *)calloc(strlen(brz->tmp_dir) + 11, sizeof(char));
sprintf(filename, "%s%u.cmph",brz->tmp_dir, nflushes);
tmp_fd = fopen(filename, "wb");
free(filename);
filename = NULL;
for(i = 0; i < nkeys_in_buffer; i++)
{
keylen1 = strlen(buffer + keys_index[i]) + 1;
@ -345,9 +368,12 @@ static int brz_gen_graphs(cmph_config_t *mph)
memory_usage = memory_usage + keylen1 + 1;
}
// sprintf(filename, "/mnt/hd4/fbotelho/%u.cmph",nflushes);
sprintf(filename, "/mnt/sd2/fbotelho/dados/%u.cmph",nflushes);
/* sprintf(filename, "%u.cmph",nflushes);*/
// sprintf(filename, "/mnt/sd2/fbotelho/dados/%u.cmph",nflushes);
filename = (char *)calloc(strlen(brz->tmp_dir) + 11, sizeof(char));
sprintf(filename, "%s%u.cmph",brz->tmp_dir, nflushes);
tmp_fd = fopen(filename, "wb");
free(filename);
filename = NULL;
for(i = 0; i < nkeys_in_buffer; i++)
{
keylen1 = strlen(buffer + keys_index[i]) + 1;
@ -376,9 +402,12 @@ static int brz_gen_graphs(cmph_config_t *mph)
for(i = 0; i < nflushes; i++)
{
// sprintf(filename, "/mnt/hd4/fbotelho/%u.cmph",i);
sprintf(filename, "/mnt/sd2/fbotelho/dados/%u.cmph",i);
/* sprintf(filename, "%u.cmph",i);*/
// sprintf(filename, "/mnt/sd2/fbotelho/dados/%u.cmph",i);
filename = (char *)calloc(strlen(brz->tmp_dir) + 11, sizeof(char));
sprintf(filename, "%s%u.cmph",brz->tmp_dir, i);
tmp_fds[i] = fopen(filename, "rb");
free(filename);
filename = NULL;
key = brz_read_key(tmp_fds[i]);
keylen = strlen(key);
h3 = hash(brz->h3, key, keylen) % brz->k;
@ -474,23 +503,6 @@ static int brz_gen_graphs(cmph_config_t *mph)
#pragma pack()
}
static void flush_buffer(cmph_uint8 *buffer, cmph_uint32 *memory_usage, FILE * graphs_fd)
{
fwrite(buffer, 1, *memory_usage, graphs_fd);
*memory_usage = 0;
}
static void save_in_disk(cmph_uint8 *buffer, cmph_uint8 * key, cmph_uint32 keylen, cmph_uint32 * memory_usage,
cmph_uint32 memory_availability, FILE * graphs_fd)
{
if(*memory_usage + keylen + 1 > memory_availability)
{
flush_buffer(buffer, memory_usage, graphs_fd);
}
memcpy(buffer + *memory_usage, key, keylen + 1);
*memory_usage = *memory_usage + keylen + 1;
}
static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n)
{
cmph_uint32 i, min_index = 0;
@ -521,21 +533,6 @@ static char * brz_read_key(FILE * fd)
return buf;
}
static char ** brz_read_keys_vd(FILE * graphs_fd, cmph_uint8 nkeys)
{
char ** keys_vd = (char **)malloc(sizeof(char *)*nkeys);
cmph_uint8 i;
for(i = 0; i < nkeys; i++)
{
char * buf = brz_read_key(graphs_fd);
keys_vd[i] = (char *)malloc(strlen(buf) + 1);
strcpy(keys_vd[i], buf);
free(buf);
}
return keys_vd;
}
static void brz_destroy_keys_vd(char ** keys_vd, cmph_uint8 nkeys)
{
cmph_uint8 i;

View File

@ -8,6 +8,7 @@ typedef struct __brz_config_data_t brz_config_data_t;
brz_config_data_t *brz_config_new();
void brz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
void brz_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir);
void brz_config_destroy(cmph_config_t *mph);
cmph_t *brz_new(cmph_config_t *mph, float c);

View File

@ -28,6 +28,7 @@ struct __brz_config_data_t
hash_state_t **h1;
hash_state_t **h2;
hash_state_t * h3;
cmph_uint8 * tmp_dir; // temporary directory
};
#endif

View File

@ -178,6 +178,23 @@ void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo)
mph->algo = algo;
}
void cmph_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir)
{
switch (mph->algo)
{
case CMPH_CHM:
break;
case CMPH_BMZ: /* included -- Fabiano */
break;
case CMPH_BRZ: /* included -- Fabiano */
brz_config_set_tmp_dir(mph, tmp_dir);
break;
default:
assert(0);
}
}
void cmph_config_destroy(cmph_config_t *mph)
{
DEBUGP("Destroying mph with algo %s\n", cmph_names[mph->algo]);

View File

@ -35,6 +35,7 @@ void cmph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
void cmph_config_set_verbosity(cmph_config_t *mph, cmph_uint32 verbosity);
void cmph_config_set_graphsize(cmph_config_t *mph, float c);
void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo);
void cmph_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir);
void cmph_config_destroy(cmph_config_t *mph);
/** Hash API **/

View File

@ -22,12 +22,12 @@
void usage(const char *prg)
{
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c value][-s seed] ] [-m file.mph] [-a algorithm] keysfile\n", prg);
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c value][-s seed] ] [-a algorithm] [-d tmp_dir] [-m file.mph] keysfile\n", prg);
}
void usage_long(const char *prg)
{
cmph_uint32 i;
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c value][-s seed] ] [-m file.mph] [-a algorithm] keysfile\n", prg);
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c value][-s seed] ] [-a algorithm] [-d tmp_dir] [-m file.mph] keysfile\n", prg);
fprintf(stderr, "Minimum perfect hashing tool\n\n");
fprintf(stderr, " -h\t print this help message\n");
fprintf(stderr, " -c\t c value that determines the number of vertices in the graph\n");
@ -41,6 +41,7 @@ void usage_long(const char *prg)
fprintf(stderr, " -g\t generation mode\n");
fprintf(stderr, " -s\t random seed\n");
fprintf(stderr, " -m\t minimum perfect hash function file \n");
fprintf(stderr, " -d\t temporary directory used in brz algorithm \n");
fprintf(stderr, " keysfile\t line separated file with keys\n");
}
@ -62,12 +63,12 @@ int main(int argc, char **argv)
float c = 2.09;
cmph_config_t *config = NULL;
cmph_t *mphf = NULL;
cmph_uint8 * tmp_dir = NULL;
cmph_io_adapter_t *source;
while (1)
{
char ch = getopt(argc, argv, "hVvgc:k:a:f:m:s:");
char ch = getopt(argc, argv, "hVvgc:k:a:f:m:d:s:");
if (ch == -1) break;
switch (ch)
{
@ -107,6 +108,9 @@ int main(int argc, char **argv)
case 'm':
mphf_file = strdup(optarg);
break;
case 'd':
tmp_dir = strdup(optarg);
break;
case 'v':
++verbosity;
break;
@ -197,6 +201,7 @@ int main(int argc, char **argv)
cmph_config_set_algo(config, mph_algo);
if (nhashes) cmph_config_set_hashfuncs(config, hashes);
cmph_config_set_verbosity(config, verbosity);
cmph_config_set_tmp_dir(config, tmp_dir);
if(mph_algo == CMPH_BMZ && c >= 2.0) c=1.15;
if (c != 0) cmph_config_set_graphsize(config, c);
mphf = cmph_new(config);
@ -263,6 +268,7 @@ int main(int argc, char **argv)
}
fclose(keys_fd);
free(mphf_file);
free(tmp_dir);
free(source);
return 0;
}