temporary directory passed by command line

This commit is contained in:
fc_botelho 2005-08-08 01:00:27 +00:00
parent da4ca77b9c
commit 9553f65537
6 changed files with 129 additions and 106 deletions

197
src/brz.c
View File

@ -20,10 +20,7 @@
static int brz_gen_graphs(cmph_config_t *mph); static int brz_gen_graphs(cmph_config_t *mph);
static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n); static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n);
static void flush_buffer(cmph_uint8 *buffer, cmph_uint32 *memory_usage, FILE * graphs_fd);
static void save_in_disk(cmph_uint8 *buffer, cmph_uint8 * key, cmph_uint32 keylen, cmph_uint32 *memory_usage, cmph_uint32 memory_availability, FILE * graphs_fd);
static char * brz_read_key(FILE * fd); static char * brz_read_key(FILE * fd);
static char ** brz_read_keys_vd(FILE * graphs_fd, cmph_uint8 nkeys);
static void brz_destroy_keys_vd(char ** keys_vd, cmph_uint8 nkeys); static void brz_destroy_keys_vd(char ** keys_vd, cmph_uint8 nkeys);
static void brz_copy_partial_mphf(brz_config_data_t *brz, bmz_data_t * bmzf, cmph_uint32 index, cmph_io_adapter_t *source); static void brz_copy_partial_mphf(brz_config_data_t *brz, bmz_data_t * bmzf, cmph_uint32 index, cmph_io_adapter_t *source);
@ -40,6 +37,8 @@ brz_config_data_t *brz_config_new()
brz->h1 = NULL; brz->h1 = NULL;
brz->h2 = NULL; brz->h2 = NULL;
brz->h3 = NULL; brz->h3 = NULL;
brz->tmp_dir = (cmph_uint8 *)calloc(10, sizeof(cmph_uint8));
strcpy(brz->tmp_dir, "/var/tmp/\0");
assert(brz); assert(brz);
return brz; return brz;
} }
@ -48,6 +47,7 @@ void brz_config_destroy(cmph_config_t *mph)
{ {
brz_config_data_t *data = (brz_config_data_t *)mph->data; brz_config_data_t *data = (brz_config_data_t *)mph->data;
DEBUGP("Destroying algorithm dependent data\n"); DEBUGP("Destroying algorithm dependent data\n");
free(data->tmp_dir);
free(data); free(data);
} }
@ -63,73 +63,94 @@ void brz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
++i, ++hashptr; ++i, ++hashptr;
} }
} }
static cmph_uint8 brz_verify_mphf(cmph_t * mphf, cmph_io_adapter_t *source)
void brz_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir)
{ {
cmph_uint8 * hashtable = NULL; brz_config_data_t *brz = (brz_config_data_t *)mph->data;
cmph_uint32 i; if(tmp_dir)
hashtable = (cmph_uint8*)malloc(source->nkeys*sizeof(cmph_uint8));
source->rewind(source->data);
memset(hashtable, 0, source->nkeys);
//check all keys
for (i = 0; i < source->nkeys; ++i)
{ {
cmph_uint32 h; cmph_uint32 len = strlen(tmp_dir);
char *buf; free(brz->tmp_dir);
cmph_uint32 buflen = 0; if(tmp_dir[len-1] != '/')
source->read(source->data, &buf, &buflen);
h = cmph_search(mphf, buf, buflen);
if(hashtable[h])
{ {
fprintf(stderr, "collision: %u\n",h); brz->tmp_dir = calloc(len+2, sizeof(cmph_uint8));
return 0; sprintf(brz->tmp_dir, "%s/\0", tmp_dir);
} }
//assert(hashtable[h]==0); else
hashtable[h] = 1;
source->dispose(source->data, buf, buflen);
}
fprintf(stderr, "\n===============================================================================\n");
free(hashtable);
return 1;
}
static cmph_uint8 brz_verify_mphf1(hash_state_t *h1, hash_state_t *h2, cmph_uint8 * g, cmph_uint32 n, cmph_io_adapter_t *source)
{
cmph_uint8 * hashtable = NULL;
cmph_uint32 i;
hashtable = (cmph_uint8*)calloc(source->nkeys, sizeof(cmph_uint8));
source->rewind(source->data);
//memset(hashtable, 0, source->nkeys);
//check all keys
for (i = 0; i < source->nkeys; ++i)
{
cmph_uint32 h1_v;
cmph_uint32 h2_v;
cmph_uint32 h;
char *buf;
cmph_uint32 buflen = 0;
source->read(source->data, &buf, &buflen);
h1_v = hash(h1, buf, buflen) % n;
h2_v = hash(h2, buf, buflen) % n;
if (h1_v == h2_v && ++h2_v >= n) h2_v = 0;
h = ((cmph_uint32)g[h1_v] + (cmph_uint32)g[h2_v]) % source->nkeys;
if(hashtable[h])
{ {
fprintf(stderr, "collision: %u\n",h); brz->tmp_dir = calloc(len+1, sizeof(cmph_uint8));
return 0; sprintf(brz->tmp_dir, "%s\0", tmp_dir);
} }
//assert(hashtable[h]==0);
hashtable[h] = 1;
source->dispose(source->data, buf, buflen);
} }
free(hashtable);
return 1;
} }
// static cmph_uint8 brz_verify_mphf(cmph_t * mphf, cmph_io_adapter_t *source)
// {
// cmph_uint8 * hashtable = NULL;
// cmph_uint32 i;
// hashtable = (cmph_uint8*)malloc(source->nkeys*sizeof(cmph_uint8));
// source->rewind(source->data);
// memset(hashtable, 0, source->nkeys);
// //check all keys
// for (i = 0; i < source->nkeys; ++i)
// {
// cmph_uint32 h;
// char *buf;
// cmph_uint32 buflen = 0;
// source->read(source->data, &buf, &buflen);
// h = cmph_search(mphf, buf, buflen);
// if(hashtable[h])
// {
// fprintf(stderr, "collision: %u\n",h);
// return 0;
// }
// //assert(hashtable[h]==0);
// hashtable[h] = 1;
// source->dispose(source->data, buf, buflen);
// }
// fprintf(stderr, "\n===============================================================================\n");
// free(hashtable);
// return 1;
// }
//
// static cmph_uint8 brz_verify_mphf1(hash_state_t *h1, hash_state_t *h2, cmph_uint8 * g, cmph_uint32 n, cmph_io_adapter_t *source)
// {
// cmph_uint8 * hashtable = NULL;
// cmph_uint32 i;
// hashtable = (cmph_uint8*)calloc(source->nkeys, sizeof(cmph_uint8));
// source->rewind(source->data);
// //memset(hashtable, 0, source->nkeys);
// //check all keys
// for (i = 0; i < source->nkeys; ++i)
// {
// cmph_uint32 h1_v;
// cmph_uint32 h2_v;
// cmph_uint32 h;
// char *buf;
// cmph_uint32 buflen = 0;
// source->read(source->data, &buf, &buflen);
//
// h1_v = hash(h1, buf, buflen) % n;
//
// h2_v = hash(h2, buf, buflen) % n;
//
// if (h1_v == h2_v && ++h2_v >= n) h2_v = 0;
//
// h = ((cmph_uint32)g[h1_v] + (cmph_uint32)g[h2_v]) % source->nkeys;
//
// if(hashtable[h])
// {
// fprintf(stderr, "collision: %u\n",h);
// return 0;
// }
// //assert(hashtable[h]==0);
// hashtable[h] = 1;
// source->dispose(source->data, buf, buflen);
//
// }
// free(hashtable);
// return 1;
// }
cmph_t *brz_new(cmph_config_t *mph, float c) cmph_t *brz_new(cmph_config_t *mph, float c)
{ {
@ -238,7 +259,7 @@ static int brz_gen_graphs(cmph_config_t *mph)
cmph_uint32 h3; cmph_uint32 h3;
FILE * tmp_fd = NULL; FILE * tmp_fd = NULL;
FILE ** tmp_fds = NULL; FILE ** tmp_fds = NULL;
char filename[100]; char *filename = NULL;
char *key = NULL; char *key = NULL;
cmph_uint32 keylen; cmph_uint32 keylen;
@ -285,9 +306,11 @@ static int brz_gen_graphs(cmph_config_t *mph)
memory_usage = memory_usage + keylen1 + 1; memory_usage = memory_usage + keylen1 + 1;
} }
// sprintf(filename, "/mnt/hd4/fbotelho/%u.cmph",nflushes); // sprintf(filename, "/mnt/hd4/fbotelho/%u.cmph",nflushes);
sprintf(filename, "/mnt/sd2/fbotelho/dados/%u.cmph",nflushes); filename = (char *)calloc(strlen(brz->tmp_dir) + 11, sizeof(char));
/* sprintf(filename, "%u.cmph",nflushes);*/ sprintf(filename, "%s%u.cmph",brz->tmp_dir, nflushes);
tmp_fd = fopen(filename, "wb"); tmp_fd = fopen(filename, "wb");
free(filename);
filename = NULL;
for(i = 0; i < nkeys_in_buffer; i++) for(i = 0; i < nkeys_in_buffer; i++)
{ {
keylen1 = strlen(buffer + keys_index[i]) + 1; keylen1 = strlen(buffer + keys_index[i]) + 1;
@ -345,9 +368,12 @@ static int brz_gen_graphs(cmph_config_t *mph)
memory_usage = memory_usage + keylen1 + 1; memory_usage = memory_usage + keylen1 + 1;
} }
// sprintf(filename, "/mnt/hd4/fbotelho/%u.cmph",nflushes); // sprintf(filename, "/mnt/hd4/fbotelho/%u.cmph",nflushes);
sprintf(filename, "/mnt/sd2/fbotelho/dados/%u.cmph",nflushes); // sprintf(filename, "/mnt/sd2/fbotelho/dados/%u.cmph",nflushes);
/* sprintf(filename, "%u.cmph",nflushes);*/ filename = (char *)calloc(strlen(brz->tmp_dir) + 11, sizeof(char));
sprintf(filename, "%s%u.cmph",brz->tmp_dir, nflushes);
tmp_fd = fopen(filename, "wb"); tmp_fd = fopen(filename, "wb");
free(filename);
filename = NULL;
for(i = 0; i < nkeys_in_buffer; i++) for(i = 0; i < nkeys_in_buffer; i++)
{ {
keylen1 = strlen(buffer + keys_index[i]) + 1; keylen1 = strlen(buffer + keys_index[i]) + 1;
@ -376,9 +402,12 @@ static int brz_gen_graphs(cmph_config_t *mph)
for(i = 0; i < nflushes; i++) for(i = 0; i < nflushes; i++)
{ {
// sprintf(filename, "/mnt/hd4/fbotelho/%u.cmph",i); // sprintf(filename, "/mnt/hd4/fbotelho/%u.cmph",i);
sprintf(filename, "/mnt/sd2/fbotelho/dados/%u.cmph",i); // sprintf(filename, "/mnt/sd2/fbotelho/dados/%u.cmph",i);
/* sprintf(filename, "%u.cmph",i);*/ filename = (char *)calloc(strlen(brz->tmp_dir) + 11, sizeof(char));
sprintf(filename, "%s%u.cmph",brz->tmp_dir, i);
tmp_fds[i] = fopen(filename, "rb"); tmp_fds[i] = fopen(filename, "rb");
free(filename);
filename = NULL;
key = brz_read_key(tmp_fds[i]); key = brz_read_key(tmp_fds[i]);
keylen = strlen(key); keylen = strlen(key);
h3 = hash(brz->h3, key, keylen) % brz->k; h3 = hash(brz->h3, key, keylen) % brz->k;
@ -474,23 +503,6 @@ static int brz_gen_graphs(cmph_config_t *mph)
#pragma pack() #pragma pack()
} }
static void flush_buffer(cmph_uint8 *buffer, cmph_uint32 *memory_usage, FILE * graphs_fd)
{
fwrite(buffer, 1, *memory_usage, graphs_fd);
*memory_usage = 0;
}
static void save_in_disk(cmph_uint8 *buffer, cmph_uint8 * key, cmph_uint32 keylen, cmph_uint32 * memory_usage,
cmph_uint32 memory_availability, FILE * graphs_fd)
{
if(*memory_usage + keylen + 1 > memory_availability)
{
flush_buffer(buffer, memory_usage, graphs_fd);
}
memcpy(buffer + *memory_usage, key, keylen + 1);
*memory_usage = *memory_usage + keylen + 1;
}
static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n) static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n)
{ {
cmph_uint32 i, min_index = 0; cmph_uint32 i, min_index = 0;
@ -521,21 +533,6 @@ static char * brz_read_key(FILE * fd)
return buf; return buf;
} }
static char ** brz_read_keys_vd(FILE * graphs_fd, cmph_uint8 nkeys)
{
char ** keys_vd = (char **)malloc(sizeof(char *)*nkeys);
cmph_uint8 i;
for(i = 0; i < nkeys; i++)
{
char * buf = brz_read_key(graphs_fd);
keys_vd[i] = (char *)malloc(strlen(buf) + 1);
strcpy(keys_vd[i], buf);
free(buf);
}
return keys_vd;
}
static void brz_destroy_keys_vd(char ** keys_vd, cmph_uint8 nkeys) static void brz_destroy_keys_vd(char ** keys_vd, cmph_uint8 nkeys)
{ {
cmph_uint8 i; cmph_uint8 i;

View File

@ -8,6 +8,7 @@ typedef struct __brz_config_data_t brz_config_data_t;
brz_config_data_t *brz_config_new(); brz_config_data_t *brz_config_new();
void brz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs); void brz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
void brz_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir);
void brz_config_destroy(cmph_config_t *mph); void brz_config_destroy(cmph_config_t *mph);
cmph_t *brz_new(cmph_config_t *mph, float c); cmph_t *brz_new(cmph_config_t *mph, float c);

View File

@ -28,6 +28,7 @@ struct __brz_config_data_t
hash_state_t **h1; hash_state_t **h1;
hash_state_t **h2; hash_state_t **h2;
hash_state_t * h3; hash_state_t * h3;
cmph_uint8 * tmp_dir; // temporary directory
}; };
#endif #endif

View File

@ -178,6 +178,23 @@ void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo)
mph->algo = algo; mph->algo = algo;
} }
void cmph_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir)
{
switch (mph->algo)
{
case CMPH_CHM:
break;
case CMPH_BMZ: /* included -- Fabiano */
break;
case CMPH_BRZ: /* included -- Fabiano */
brz_config_set_tmp_dir(mph, tmp_dir);
break;
default:
assert(0);
}
}
void cmph_config_destroy(cmph_config_t *mph) void cmph_config_destroy(cmph_config_t *mph)
{ {
DEBUGP("Destroying mph with algo %s\n", cmph_names[mph->algo]); DEBUGP("Destroying mph with algo %s\n", cmph_names[mph->algo]);

View File

@ -35,6 +35,7 @@ void cmph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
void cmph_config_set_verbosity(cmph_config_t *mph, cmph_uint32 verbosity); void cmph_config_set_verbosity(cmph_config_t *mph, cmph_uint32 verbosity);
void cmph_config_set_graphsize(cmph_config_t *mph, float c); void cmph_config_set_graphsize(cmph_config_t *mph, float c);
void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo); void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo);
void cmph_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir);
void cmph_config_destroy(cmph_config_t *mph); void cmph_config_destroy(cmph_config_t *mph);
/** Hash API **/ /** Hash API **/

View File

@ -22,12 +22,12 @@
void usage(const char *prg) void usage(const char *prg)
{ {
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c value][-s seed] ] [-m file.mph] [-a algorithm] keysfile\n", prg); fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c value][-s seed] ] [-a algorithm] [-d tmp_dir] [-m file.mph] keysfile\n", prg);
} }
void usage_long(const char *prg) void usage_long(const char *prg)
{ {
cmph_uint32 i; cmph_uint32 i;
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c value][-s seed] ] [-m file.mph] [-a algorithm] keysfile\n", prg); fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c value][-s seed] ] [-a algorithm] [-d tmp_dir] [-m file.mph] keysfile\n", prg);
fprintf(stderr, "Minimum perfect hashing tool\n\n"); fprintf(stderr, "Minimum perfect hashing tool\n\n");
fprintf(stderr, " -h\t print this help message\n"); fprintf(stderr, " -h\t print this help message\n");
fprintf(stderr, " -c\t c value that determines the number of vertices in the graph\n"); fprintf(stderr, " -c\t c value that determines the number of vertices in the graph\n");
@ -41,6 +41,7 @@ void usage_long(const char *prg)
fprintf(stderr, " -g\t generation mode\n"); fprintf(stderr, " -g\t generation mode\n");
fprintf(stderr, " -s\t random seed\n"); fprintf(stderr, " -s\t random seed\n");
fprintf(stderr, " -m\t minimum perfect hash function file \n"); fprintf(stderr, " -m\t minimum perfect hash function file \n");
fprintf(stderr, " -d\t temporary directory used in brz algorithm \n");
fprintf(stderr, " keysfile\t line separated file with keys\n"); fprintf(stderr, " keysfile\t line separated file with keys\n");
} }
@ -62,12 +63,12 @@ int main(int argc, char **argv)
float c = 2.09; float c = 2.09;
cmph_config_t *config = NULL; cmph_config_t *config = NULL;
cmph_t *mphf = NULL; cmph_t *mphf = NULL;
cmph_uint8 * tmp_dir = NULL;
cmph_io_adapter_t *source; cmph_io_adapter_t *source;
while (1) while (1)
{ {
char ch = getopt(argc, argv, "hVvgc:k:a:f:m:s:"); char ch = getopt(argc, argv, "hVvgc:k:a:f:m:d:s:");
if (ch == -1) break; if (ch == -1) break;
switch (ch) switch (ch)
{ {
@ -107,6 +108,9 @@ int main(int argc, char **argv)
case 'm': case 'm':
mphf_file = strdup(optarg); mphf_file = strdup(optarg);
break; break;
case 'd':
tmp_dir = strdup(optarg);
break;
case 'v': case 'v':
++verbosity; ++verbosity;
break; break;
@ -197,6 +201,7 @@ int main(int argc, char **argv)
cmph_config_set_algo(config, mph_algo); cmph_config_set_algo(config, mph_algo);
if (nhashes) cmph_config_set_hashfuncs(config, hashes); if (nhashes) cmph_config_set_hashfuncs(config, hashes);
cmph_config_set_verbosity(config, verbosity); cmph_config_set_verbosity(config, verbosity);
cmph_config_set_tmp_dir(config, tmp_dir);
if(mph_algo == CMPH_BMZ && c >= 2.0) c=1.15; if(mph_algo == CMPH_BMZ && c >= 2.0) c=1.15;
if (c != 0) cmph_config_set_graphsize(config, c); if (c != 0) cmph_config_set_graphsize(config, c);
mphf = cmph_new(config); mphf = cmph_new(config);
@ -263,6 +268,7 @@ int main(int argc, char **argv)
} }
fclose(keys_fd); fclose(keys_fd);
free(mphf_file); free(mphf_file);
free(tmp_dir);
free(source); free(source);
return 0; return 0;
} }