From 9553f655370a381a78a714d894e0504accd1dfd6 Mon Sep 17 00:00:00 2001 From: fc_botelho Date: Mon, 8 Aug 2005 01:00:27 +0000 Subject: [PATCH] temporary directory passed by command line --- src/brz.c | 201 +++++++++++++++++++++++----------------------- src/brz.h | 1 + src/brz_structs.h | 1 + src/cmph.c | 17 ++++ src/cmph.h | 1 + src/main.c | 14 +++- 6 files changed, 129 insertions(+), 106 deletions(-) diff --git a/src/brz.c b/src/brz.c index 6269d30..c69ad17 100755 --- a/src/brz.c +++ b/src/brz.c @@ -20,10 +20,7 @@ static int brz_gen_graphs(cmph_config_t *mph); static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n); -static void flush_buffer(cmph_uint8 *buffer, cmph_uint32 *memory_usage, FILE * graphs_fd); -static void save_in_disk(cmph_uint8 *buffer, cmph_uint8 * key, cmph_uint32 keylen, cmph_uint32 *memory_usage, cmph_uint32 memory_availability, FILE * graphs_fd); static char * brz_read_key(FILE * fd); -static char ** brz_read_keys_vd(FILE * graphs_fd, cmph_uint8 nkeys); static void brz_destroy_keys_vd(char ** keys_vd, cmph_uint8 nkeys); static void brz_copy_partial_mphf(brz_config_data_t *brz, bmz_data_t * bmzf, cmph_uint32 index, cmph_io_adapter_t *source); @@ -40,6 +37,8 @@ brz_config_data_t *brz_config_new() brz->h1 = NULL; brz->h2 = NULL; brz->h3 = NULL; + brz->tmp_dir = (cmph_uint8 *)calloc(10, sizeof(cmph_uint8)); + strcpy(brz->tmp_dir, "/var/tmp/\0"); assert(brz); return brz; } @@ -48,6 +47,7 @@ void brz_config_destroy(cmph_config_t *mph) { brz_config_data_t *data = (brz_config_data_t *)mph->data; DEBUGP("Destroying algorithm dependent data\n"); + free(data->tmp_dir); free(data); } @@ -63,73 +63,94 @@ void brz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs) ++i, ++hashptr; } } -static cmph_uint8 brz_verify_mphf(cmph_t * mphf, cmph_io_adapter_t *source) + +void brz_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir) { - cmph_uint8 * hashtable = NULL; - cmph_uint32 i; - hashtable = (cmph_uint8*)malloc(source->nkeys*sizeof(cmph_uint8)); - source->rewind(source->data); - memset(hashtable, 0, source->nkeys); - //check all keys - for (i = 0; i < source->nkeys; ++i) + brz_config_data_t *brz = (brz_config_data_t *)mph->data; + if(tmp_dir) { - cmph_uint32 h; - char *buf; - cmph_uint32 buflen = 0; - source->read(source->data, &buf, &buflen); - h = cmph_search(mphf, buf, buflen); - if(hashtable[h]) + cmph_uint32 len = strlen(tmp_dir); + free(brz->tmp_dir); + if(tmp_dir[len-1] != '/') { - fprintf(stderr, "collision: %u\n",h); - return 0; + brz->tmp_dir = calloc(len+2, sizeof(cmph_uint8)); + sprintf(brz->tmp_dir, "%s/\0", tmp_dir); + } + else + { + brz->tmp_dir = calloc(len+1, sizeof(cmph_uint8)); + sprintf(brz->tmp_dir, "%s\0", tmp_dir); } - //assert(hashtable[h]==0); - hashtable[h] = 1; - source->dispose(source->data, buf, buflen); - } - fprintf(stderr, "\n===============================================================================\n"); - free(hashtable); - return 1; -} - -static cmph_uint8 brz_verify_mphf1(hash_state_t *h1, hash_state_t *h2, cmph_uint8 * g, cmph_uint32 n, cmph_io_adapter_t *source) -{ - cmph_uint8 * hashtable = NULL; - cmph_uint32 i; - hashtable = (cmph_uint8*)calloc(source->nkeys, sizeof(cmph_uint8)); - source->rewind(source->data); - //memset(hashtable, 0, source->nkeys); - //check all keys - for (i = 0; i < source->nkeys; ++i) - { - cmph_uint32 h1_v; - cmph_uint32 h2_v; - cmph_uint32 h; - char *buf; - cmph_uint32 buflen = 0; - source->read(source->data, &buf, &buflen); - - h1_v = hash(h1, buf, buflen) % n; - - h2_v = hash(h2, buf, buflen) % n; - - if (h1_v == h2_v && ++h2_v >= n) h2_v = 0; - h = ((cmph_uint32)g[h1_v] + (cmph_uint32)g[h2_v]) % source->nkeys; - - if(hashtable[h]) - { - fprintf(stderr, "collision: %u\n",h); - return 0; - } - //assert(hashtable[h]==0); - hashtable[h] = 1; - source->dispose(source->data, buf, buflen); - } - free(hashtable); - return 1; } +// static cmph_uint8 brz_verify_mphf(cmph_t * mphf, cmph_io_adapter_t *source) +// { +// cmph_uint8 * hashtable = NULL; +// cmph_uint32 i; +// hashtable = (cmph_uint8*)malloc(source->nkeys*sizeof(cmph_uint8)); +// source->rewind(source->data); +// memset(hashtable, 0, source->nkeys); +// //check all keys +// for (i = 0; i < source->nkeys; ++i) +// { +// cmph_uint32 h; +// char *buf; +// cmph_uint32 buflen = 0; +// source->read(source->data, &buf, &buflen); +// h = cmph_search(mphf, buf, buflen); +// if(hashtable[h]) +// { +// fprintf(stderr, "collision: %u\n",h); +// return 0; +// } +// //assert(hashtable[h]==0); +// hashtable[h] = 1; +// source->dispose(source->data, buf, buflen); +// } +// fprintf(stderr, "\n===============================================================================\n"); +// free(hashtable); +// return 1; +// } +// +// static cmph_uint8 brz_verify_mphf1(hash_state_t *h1, hash_state_t *h2, cmph_uint8 * g, cmph_uint32 n, cmph_io_adapter_t *source) +// { +// cmph_uint8 * hashtable = NULL; +// cmph_uint32 i; +// hashtable = (cmph_uint8*)calloc(source->nkeys, sizeof(cmph_uint8)); +// source->rewind(source->data); +// //memset(hashtable, 0, source->nkeys); +// //check all keys +// for (i = 0; i < source->nkeys; ++i) +// { +// cmph_uint32 h1_v; +// cmph_uint32 h2_v; +// cmph_uint32 h; +// char *buf; +// cmph_uint32 buflen = 0; +// source->read(source->data, &buf, &buflen); +// +// h1_v = hash(h1, buf, buflen) % n; +// +// h2_v = hash(h2, buf, buflen) % n; +// +// if (h1_v == h2_v && ++h2_v >= n) h2_v = 0; +// +// h = ((cmph_uint32)g[h1_v] + (cmph_uint32)g[h2_v]) % source->nkeys; +// +// if(hashtable[h]) +// { +// fprintf(stderr, "collision: %u\n",h); +// return 0; +// } +// //assert(hashtable[h]==0); +// hashtable[h] = 1; +// source->dispose(source->data, buf, buflen); +// +// } +// free(hashtable); +// return 1; +// } cmph_t *brz_new(cmph_config_t *mph, float c) { @@ -238,7 +259,7 @@ static int brz_gen_graphs(cmph_config_t *mph) cmph_uint32 h3; FILE * tmp_fd = NULL; FILE ** tmp_fds = NULL; - char filename[100]; + char *filename = NULL; char *key = NULL; cmph_uint32 keylen; @@ -285,9 +306,11 @@ static int brz_gen_graphs(cmph_config_t *mph) memory_usage = memory_usage + keylen1 + 1; } // sprintf(filename, "/mnt/hd4/fbotelho/%u.cmph",nflushes); - sprintf(filename, "/mnt/sd2/fbotelho/dados/%u.cmph",nflushes); -/* sprintf(filename, "%u.cmph",nflushes);*/ + filename = (char *)calloc(strlen(brz->tmp_dir) + 11, sizeof(char)); + sprintf(filename, "%s%u.cmph",brz->tmp_dir, nflushes); tmp_fd = fopen(filename, "wb"); + free(filename); + filename = NULL; for(i = 0; i < nkeys_in_buffer; i++) { keylen1 = strlen(buffer + keys_index[i]) + 1; @@ -345,9 +368,12 @@ static int brz_gen_graphs(cmph_config_t *mph) memory_usage = memory_usage + keylen1 + 1; } // sprintf(filename, "/mnt/hd4/fbotelho/%u.cmph",nflushes); - sprintf(filename, "/mnt/sd2/fbotelho/dados/%u.cmph",nflushes); -/* sprintf(filename, "%u.cmph",nflushes);*/ +// sprintf(filename, "/mnt/sd2/fbotelho/dados/%u.cmph",nflushes); + filename = (char *)calloc(strlen(brz->tmp_dir) + 11, sizeof(char)); + sprintf(filename, "%s%u.cmph",brz->tmp_dir, nflushes); tmp_fd = fopen(filename, "wb"); + free(filename); + filename = NULL; for(i = 0; i < nkeys_in_buffer; i++) { keylen1 = strlen(buffer + keys_index[i]) + 1; @@ -376,9 +402,12 @@ static int brz_gen_graphs(cmph_config_t *mph) for(i = 0; i < nflushes; i++) { // sprintf(filename, "/mnt/hd4/fbotelho/%u.cmph",i); - sprintf(filename, "/mnt/sd2/fbotelho/dados/%u.cmph",i); -/* sprintf(filename, "%u.cmph",i);*/ +// sprintf(filename, "/mnt/sd2/fbotelho/dados/%u.cmph",i); + filename = (char *)calloc(strlen(brz->tmp_dir) + 11, sizeof(char)); + sprintf(filename, "%s%u.cmph",brz->tmp_dir, i); tmp_fds[i] = fopen(filename, "rb"); + free(filename); + filename = NULL; key = brz_read_key(tmp_fds[i]); keylen = strlen(key); h3 = hash(brz->h3, key, keylen) % brz->k; @@ -474,23 +503,6 @@ static int brz_gen_graphs(cmph_config_t *mph) #pragma pack() } -static void flush_buffer(cmph_uint8 *buffer, cmph_uint32 *memory_usage, FILE * graphs_fd) -{ - fwrite(buffer, 1, *memory_usage, graphs_fd); - *memory_usage = 0; -} - -static void save_in_disk(cmph_uint8 *buffer, cmph_uint8 * key, cmph_uint32 keylen, cmph_uint32 * memory_usage, - cmph_uint32 memory_availability, FILE * graphs_fd) -{ - if(*memory_usage + keylen + 1 > memory_availability) - { - flush_buffer(buffer, memory_usage, graphs_fd); - } - memcpy(buffer + *memory_usage, key, keylen + 1); - *memory_usage = *memory_usage + keylen + 1; -} - static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n) { cmph_uint32 i, min_index = 0; @@ -521,21 +533,6 @@ static char * brz_read_key(FILE * fd) return buf; } -static char ** brz_read_keys_vd(FILE * graphs_fd, cmph_uint8 nkeys) -{ - char ** keys_vd = (char **)malloc(sizeof(char *)*nkeys); - cmph_uint8 i; - - for(i = 0; i < nkeys; i++) - { - char * buf = brz_read_key(graphs_fd); - keys_vd[i] = (char *)malloc(strlen(buf) + 1); - strcpy(keys_vd[i], buf); - free(buf); - } - return keys_vd; -} - static void brz_destroy_keys_vd(char ** keys_vd, cmph_uint8 nkeys) { cmph_uint8 i; diff --git a/src/brz.h b/src/brz.h index a1ed145..886eab2 100644 --- a/src/brz.h +++ b/src/brz.h @@ -8,6 +8,7 @@ typedef struct __brz_config_data_t brz_config_data_t; brz_config_data_t *brz_config_new(); void brz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs); +void brz_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir); void brz_config_destroy(cmph_config_t *mph); cmph_t *brz_new(cmph_config_t *mph, float c); diff --git a/src/brz_structs.h b/src/brz_structs.h index 742a425..f6af310 100755 --- a/src/brz_structs.h +++ b/src/brz_structs.h @@ -28,6 +28,7 @@ struct __brz_config_data_t hash_state_t **h1; hash_state_t **h2; hash_state_t * h3; + cmph_uint8 * tmp_dir; // temporary directory }; #endif diff --git a/src/cmph.c b/src/cmph.c index f7f4acb..203d60d 100644 --- a/src/cmph.c +++ b/src/cmph.c @@ -178,6 +178,23 @@ void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo) mph->algo = algo; } +void cmph_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir) +{ + switch (mph->algo) + { + case CMPH_CHM: + break; + case CMPH_BMZ: /* included -- Fabiano */ + break; + case CMPH_BRZ: /* included -- Fabiano */ + brz_config_set_tmp_dir(mph, tmp_dir); + break; + default: + assert(0); + } + +} + void cmph_config_destroy(cmph_config_t *mph) { DEBUGP("Destroying mph with algo %s\n", cmph_names[mph->algo]); diff --git a/src/cmph.h b/src/cmph.h index d30fb51..c301a91 100644 --- a/src/cmph.h +++ b/src/cmph.h @@ -35,6 +35,7 @@ void cmph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs); void cmph_config_set_verbosity(cmph_config_t *mph, cmph_uint32 verbosity); void cmph_config_set_graphsize(cmph_config_t *mph, float c); void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo); +void cmph_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir); void cmph_config_destroy(cmph_config_t *mph); /** Hash API **/ diff --git a/src/main.c b/src/main.c index ed24a72..6416359 100644 --- a/src/main.c +++ b/src/main.c @@ -22,12 +22,12 @@ void usage(const char *prg) { - fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c value][-s seed] ] [-m file.mph] [-a algorithm] keysfile\n", prg); + fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c value][-s seed] ] [-a algorithm] [-d tmp_dir] [-m file.mph] keysfile\n", prg); } void usage_long(const char *prg) { cmph_uint32 i; - fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c value][-s seed] ] [-m file.mph] [-a algorithm] keysfile\n", prg); + fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c value][-s seed] ] [-a algorithm] [-d tmp_dir] [-m file.mph] keysfile\n", prg); fprintf(stderr, "Minimum perfect hashing tool\n\n"); fprintf(stderr, " -h\t print this help message\n"); fprintf(stderr, " -c\t c value that determines the number of vertices in the graph\n"); @@ -41,6 +41,7 @@ void usage_long(const char *prg) fprintf(stderr, " -g\t generation mode\n"); fprintf(stderr, " -s\t random seed\n"); fprintf(stderr, " -m\t minimum perfect hash function file \n"); + fprintf(stderr, " -d\t temporary directory used in brz algorithm \n"); fprintf(stderr, " keysfile\t line separated file with keys\n"); } @@ -62,12 +63,12 @@ int main(int argc, char **argv) float c = 2.09; cmph_config_t *config = NULL; cmph_t *mphf = NULL; - + cmph_uint8 * tmp_dir = NULL; cmph_io_adapter_t *source; while (1) { - char ch = getopt(argc, argv, "hVvgc:k:a:f:m:s:"); + char ch = getopt(argc, argv, "hVvgc:k:a:f:m:d:s:"); if (ch == -1) break; switch (ch) { @@ -107,6 +108,9 @@ int main(int argc, char **argv) case 'm': mphf_file = strdup(optarg); break; + case 'd': + tmp_dir = strdup(optarg); + break; case 'v': ++verbosity; break; @@ -197,6 +201,7 @@ int main(int argc, char **argv) cmph_config_set_algo(config, mph_algo); if (nhashes) cmph_config_set_hashfuncs(config, hashes); cmph_config_set_verbosity(config, verbosity); + cmph_config_set_tmp_dir(config, tmp_dir); if(mph_algo == CMPH_BMZ && c >= 2.0) c=1.15; if (c != 0) cmph_config_set_graphsize(config, c); mphf = cmph_new(config); @@ -263,6 +268,7 @@ int main(int argc, char **argv) } fclose(keys_fd); free(mphf_file); + free(tmp_dir); free(source); return 0; }