temporary directory passed by command line
This commit is contained in:
parent
da4ca77b9c
commit
9553f65537
201
src/brz.c
201
src/brz.c
|
@ -20,10 +20,7 @@
|
|||
|
||||
static int brz_gen_graphs(cmph_config_t *mph);
|
||||
static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n);
|
||||
static void flush_buffer(cmph_uint8 *buffer, cmph_uint32 *memory_usage, FILE * graphs_fd);
|
||||
static void save_in_disk(cmph_uint8 *buffer, cmph_uint8 * key, cmph_uint32 keylen, cmph_uint32 *memory_usage, cmph_uint32 memory_availability, FILE * graphs_fd);
|
||||
static char * brz_read_key(FILE * fd);
|
||||
static char ** brz_read_keys_vd(FILE * graphs_fd, cmph_uint8 nkeys);
|
||||
static void brz_destroy_keys_vd(char ** keys_vd, cmph_uint8 nkeys);
|
||||
static void brz_copy_partial_mphf(brz_config_data_t *brz, bmz_data_t * bmzf, cmph_uint32 index, cmph_io_adapter_t *source);
|
||||
|
||||
|
@ -40,6 +37,8 @@ brz_config_data_t *brz_config_new()
|
|||
brz->h1 = NULL;
|
||||
brz->h2 = NULL;
|
||||
brz->h3 = NULL;
|
||||
brz->tmp_dir = (cmph_uint8 *)calloc(10, sizeof(cmph_uint8));
|
||||
strcpy(brz->tmp_dir, "/var/tmp/\0");
|
||||
assert(brz);
|
||||
return brz;
|
||||
}
|
||||
|
@ -48,6 +47,7 @@ void brz_config_destroy(cmph_config_t *mph)
|
|||
{
|
||||
brz_config_data_t *data = (brz_config_data_t *)mph->data;
|
||||
DEBUGP("Destroying algorithm dependent data\n");
|
||||
free(data->tmp_dir);
|
||||
free(data);
|
||||
}
|
||||
|
||||
|
@ -63,73 +63,94 @@ void brz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
|
|||
++i, ++hashptr;
|
||||
}
|
||||
}
|
||||
static cmph_uint8 brz_verify_mphf(cmph_t * mphf, cmph_io_adapter_t *source)
|
||||
|
||||
void brz_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir)
|
||||
{
|
||||
cmph_uint8 * hashtable = NULL;
|
||||
cmph_uint32 i;
|
||||
hashtable = (cmph_uint8*)malloc(source->nkeys*sizeof(cmph_uint8));
|
||||
source->rewind(source->data);
|
||||
memset(hashtable, 0, source->nkeys);
|
||||
//check all keys
|
||||
for (i = 0; i < source->nkeys; ++i)
|
||||
brz_config_data_t *brz = (brz_config_data_t *)mph->data;
|
||||
if(tmp_dir)
|
||||
{
|
||||
cmph_uint32 h;
|
||||
char *buf;
|
||||
cmph_uint32 buflen = 0;
|
||||
source->read(source->data, &buf, &buflen);
|
||||
h = cmph_search(mphf, buf, buflen);
|
||||
if(hashtable[h])
|
||||
cmph_uint32 len = strlen(tmp_dir);
|
||||
free(brz->tmp_dir);
|
||||
if(tmp_dir[len-1] != '/')
|
||||
{
|
||||
fprintf(stderr, "collision: %u\n",h);
|
||||
return 0;
|
||||
brz->tmp_dir = calloc(len+2, sizeof(cmph_uint8));
|
||||
sprintf(brz->tmp_dir, "%s/\0", tmp_dir);
|
||||
}
|
||||
else
|
||||
{
|
||||
brz->tmp_dir = calloc(len+1, sizeof(cmph_uint8));
|
||||
sprintf(brz->tmp_dir, "%s\0", tmp_dir);
|
||||
}
|
||||
//assert(hashtable[h]==0);
|
||||
hashtable[h] = 1;
|
||||
source->dispose(source->data, buf, buflen);
|
||||
}
|
||||
fprintf(stderr, "\n===============================================================================\n");
|
||||
free(hashtable);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static cmph_uint8 brz_verify_mphf1(hash_state_t *h1, hash_state_t *h2, cmph_uint8 * g, cmph_uint32 n, cmph_io_adapter_t *source)
|
||||
{
|
||||
cmph_uint8 * hashtable = NULL;
|
||||
cmph_uint32 i;
|
||||
hashtable = (cmph_uint8*)calloc(source->nkeys, sizeof(cmph_uint8));
|
||||
source->rewind(source->data);
|
||||
//memset(hashtable, 0, source->nkeys);
|
||||
//check all keys
|
||||
for (i = 0; i < source->nkeys; ++i)
|
||||
{
|
||||
cmph_uint32 h1_v;
|
||||
cmph_uint32 h2_v;
|
||||
cmph_uint32 h;
|
||||
char *buf;
|
||||
cmph_uint32 buflen = 0;
|
||||
source->read(source->data, &buf, &buflen);
|
||||
|
||||
h1_v = hash(h1, buf, buflen) % n;
|
||||
|
||||
h2_v = hash(h2, buf, buflen) % n;
|
||||
|
||||
if (h1_v == h2_v && ++h2_v >= n) h2_v = 0;
|
||||
|
||||
h = ((cmph_uint32)g[h1_v] + (cmph_uint32)g[h2_v]) % source->nkeys;
|
||||
|
||||
if(hashtable[h])
|
||||
{
|
||||
fprintf(stderr, "collision: %u\n",h);
|
||||
return 0;
|
||||
}
|
||||
//assert(hashtable[h]==0);
|
||||
hashtable[h] = 1;
|
||||
source->dispose(source->data, buf, buflen);
|
||||
|
||||
}
|
||||
free(hashtable);
|
||||
return 1;
|
||||
}
|
||||
// static cmph_uint8 brz_verify_mphf(cmph_t * mphf, cmph_io_adapter_t *source)
|
||||
// {
|
||||
// cmph_uint8 * hashtable = NULL;
|
||||
// cmph_uint32 i;
|
||||
// hashtable = (cmph_uint8*)malloc(source->nkeys*sizeof(cmph_uint8));
|
||||
// source->rewind(source->data);
|
||||
// memset(hashtable, 0, source->nkeys);
|
||||
// //check all keys
|
||||
// for (i = 0; i < source->nkeys; ++i)
|
||||
// {
|
||||
// cmph_uint32 h;
|
||||
// char *buf;
|
||||
// cmph_uint32 buflen = 0;
|
||||
// source->read(source->data, &buf, &buflen);
|
||||
// h = cmph_search(mphf, buf, buflen);
|
||||
// if(hashtable[h])
|
||||
// {
|
||||
// fprintf(stderr, "collision: %u\n",h);
|
||||
// return 0;
|
||||
// }
|
||||
// //assert(hashtable[h]==0);
|
||||
// hashtable[h] = 1;
|
||||
// source->dispose(source->data, buf, buflen);
|
||||
// }
|
||||
// fprintf(stderr, "\n===============================================================================\n");
|
||||
// free(hashtable);
|
||||
// return 1;
|
||||
// }
|
||||
//
|
||||
// static cmph_uint8 brz_verify_mphf1(hash_state_t *h1, hash_state_t *h2, cmph_uint8 * g, cmph_uint32 n, cmph_io_adapter_t *source)
|
||||
// {
|
||||
// cmph_uint8 * hashtable = NULL;
|
||||
// cmph_uint32 i;
|
||||
// hashtable = (cmph_uint8*)calloc(source->nkeys, sizeof(cmph_uint8));
|
||||
// source->rewind(source->data);
|
||||
// //memset(hashtable, 0, source->nkeys);
|
||||
// //check all keys
|
||||
// for (i = 0; i < source->nkeys; ++i)
|
||||
// {
|
||||
// cmph_uint32 h1_v;
|
||||
// cmph_uint32 h2_v;
|
||||
// cmph_uint32 h;
|
||||
// char *buf;
|
||||
// cmph_uint32 buflen = 0;
|
||||
// source->read(source->data, &buf, &buflen);
|
||||
//
|
||||
// h1_v = hash(h1, buf, buflen) % n;
|
||||
//
|
||||
// h2_v = hash(h2, buf, buflen) % n;
|
||||
//
|
||||
// if (h1_v == h2_v && ++h2_v >= n) h2_v = 0;
|
||||
//
|
||||
// h = ((cmph_uint32)g[h1_v] + (cmph_uint32)g[h2_v]) % source->nkeys;
|
||||
//
|
||||
// if(hashtable[h])
|
||||
// {
|
||||
// fprintf(stderr, "collision: %u\n",h);
|
||||
// return 0;
|
||||
// }
|
||||
// //assert(hashtable[h]==0);
|
||||
// hashtable[h] = 1;
|
||||
// source->dispose(source->data, buf, buflen);
|
||||
//
|
||||
// }
|
||||
// free(hashtable);
|
||||
// return 1;
|
||||
// }
|
||||
|
||||
cmph_t *brz_new(cmph_config_t *mph, float c)
|
||||
{
|
||||
|
@ -238,7 +259,7 @@ static int brz_gen_graphs(cmph_config_t *mph)
|
|||
cmph_uint32 h3;
|
||||
FILE * tmp_fd = NULL;
|
||||
FILE ** tmp_fds = NULL;
|
||||
char filename[100];
|
||||
char *filename = NULL;
|
||||
char *key = NULL;
|
||||
cmph_uint32 keylen;
|
||||
|
||||
|
@ -285,9 +306,11 @@ static int brz_gen_graphs(cmph_config_t *mph)
|
|||
memory_usage = memory_usage + keylen1 + 1;
|
||||
}
|
||||
// sprintf(filename, "/mnt/hd4/fbotelho/%u.cmph",nflushes);
|
||||
sprintf(filename, "/mnt/sd2/fbotelho/dados/%u.cmph",nflushes);
|
||||
/* sprintf(filename, "%u.cmph",nflushes);*/
|
||||
filename = (char *)calloc(strlen(brz->tmp_dir) + 11, sizeof(char));
|
||||
sprintf(filename, "%s%u.cmph",brz->tmp_dir, nflushes);
|
||||
tmp_fd = fopen(filename, "wb");
|
||||
free(filename);
|
||||
filename = NULL;
|
||||
for(i = 0; i < nkeys_in_buffer; i++)
|
||||
{
|
||||
keylen1 = strlen(buffer + keys_index[i]) + 1;
|
||||
|
@ -345,9 +368,12 @@ static int brz_gen_graphs(cmph_config_t *mph)
|
|||
memory_usage = memory_usage + keylen1 + 1;
|
||||
}
|
||||
// sprintf(filename, "/mnt/hd4/fbotelho/%u.cmph",nflushes);
|
||||
sprintf(filename, "/mnt/sd2/fbotelho/dados/%u.cmph",nflushes);
|
||||
/* sprintf(filename, "%u.cmph",nflushes);*/
|
||||
// sprintf(filename, "/mnt/sd2/fbotelho/dados/%u.cmph",nflushes);
|
||||
filename = (char *)calloc(strlen(brz->tmp_dir) + 11, sizeof(char));
|
||||
sprintf(filename, "%s%u.cmph",brz->tmp_dir, nflushes);
|
||||
tmp_fd = fopen(filename, "wb");
|
||||
free(filename);
|
||||
filename = NULL;
|
||||
for(i = 0; i < nkeys_in_buffer; i++)
|
||||
{
|
||||
keylen1 = strlen(buffer + keys_index[i]) + 1;
|
||||
|
@ -376,9 +402,12 @@ static int brz_gen_graphs(cmph_config_t *mph)
|
|||
for(i = 0; i < nflushes; i++)
|
||||
{
|
||||
// sprintf(filename, "/mnt/hd4/fbotelho/%u.cmph",i);
|
||||
sprintf(filename, "/mnt/sd2/fbotelho/dados/%u.cmph",i);
|
||||
/* sprintf(filename, "%u.cmph",i);*/
|
||||
// sprintf(filename, "/mnt/sd2/fbotelho/dados/%u.cmph",i);
|
||||
filename = (char *)calloc(strlen(brz->tmp_dir) + 11, sizeof(char));
|
||||
sprintf(filename, "%s%u.cmph",brz->tmp_dir, i);
|
||||
tmp_fds[i] = fopen(filename, "rb");
|
||||
free(filename);
|
||||
filename = NULL;
|
||||
key = brz_read_key(tmp_fds[i]);
|
||||
keylen = strlen(key);
|
||||
h3 = hash(brz->h3, key, keylen) % brz->k;
|
||||
|
@ -474,23 +503,6 @@ static int brz_gen_graphs(cmph_config_t *mph)
|
|||
#pragma pack()
|
||||
}
|
||||
|
||||
static void flush_buffer(cmph_uint8 *buffer, cmph_uint32 *memory_usage, FILE * graphs_fd)
|
||||
{
|
||||
fwrite(buffer, 1, *memory_usage, graphs_fd);
|
||||
*memory_usage = 0;
|
||||
}
|
||||
|
||||
static void save_in_disk(cmph_uint8 *buffer, cmph_uint8 * key, cmph_uint32 keylen, cmph_uint32 * memory_usage,
|
||||
cmph_uint32 memory_availability, FILE * graphs_fd)
|
||||
{
|
||||
if(*memory_usage + keylen + 1 > memory_availability)
|
||||
{
|
||||
flush_buffer(buffer, memory_usage, graphs_fd);
|
||||
}
|
||||
memcpy(buffer + *memory_usage, key, keylen + 1);
|
||||
*memory_usage = *memory_usage + keylen + 1;
|
||||
}
|
||||
|
||||
static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n)
|
||||
{
|
||||
cmph_uint32 i, min_index = 0;
|
||||
|
@ -521,21 +533,6 @@ static char * brz_read_key(FILE * fd)
|
|||
return buf;
|
||||
}
|
||||
|
||||
static char ** brz_read_keys_vd(FILE * graphs_fd, cmph_uint8 nkeys)
|
||||
{
|
||||
char ** keys_vd = (char **)malloc(sizeof(char *)*nkeys);
|
||||
cmph_uint8 i;
|
||||
|
||||
for(i = 0; i < nkeys; i++)
|
||||
{
|
||||
char * buf = brz_read_key(graphs_fd);
|
||||
keys_vd[i] = (char *)malloc(strlen(buf) + 1);
|
||||
strcpy(keys_vd[i], buf);
|
||||
free(buf);
|
||||
}
|
||||
return keys_vd;
|
||||
}
|
||||
|
||||
static void brz_destroy_keys_vd(char ** keys_vd, cmph_uint8 nkeys)
|
||||
{
|
||||
cmph_uint8 i;
|
||||
|
|
|
@ -8,6 +8,7 @@ typedef struct __brz_config_data_t brz_config_data_t;
|
|||
|
||||
brz_config_data_t *brz_config_new();
|
||||
void brz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
|
||||
void brz_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir);
|
||||
void brz_config_destroy(cmph_config_t *mph);
|
||||
cmph_t *brz_new(cmph_config_t *mph, float c);
|
||||
|
||||
|
|
|
@ -28,6 +28,7 @@ struct __brz_config_data_t
|
|||
hash_state_t **h1;
|
||||
hash_state_t **h2;
|
||||
hash_state_t * h3;
|
||||
cmph_uint8 * tmp_dir; // temporary directory
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
17
src/cmph.c
17
src/cmph.c
|
@ -178,6 +178,23 @@ void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo)
|
|||
mph->algo = algo;
|
||||
}
|
||||
|
||||
void cmph_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir)
|
||||
{
|
||||
switch (mph->algo)
|
||||
{
|
||||
case CMPH_CHM:
|
||||
break;
|
||||
case CMPH_BMZ: /* included -- Fabiano */
|
||||
break;
|
||||
case CMPH_BRZ: /* included -- Fabiano */
|
||||
brz_config_set_tmp_dir(mph, tmp_dir);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void cmph_config_destroy(cmph_config_t *mph)
|
||||
{
|
||||
DEBUGP("Destroying mph with algo %s\n", cmph_names[mph->algo]);
|
||||
|
|
|
@ -35,6 +35,7 @@ void cmph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
|
|||
void cmph_config_set_verbosity(cmph_config_t *mph, cmph_uint32 verbosity);
|
||||
void cmph_config_set_graphsize(cmph_config_t *mph, float c);
|
||||
void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo);
|
||||
void cmph_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir);
|
||||
void cmph_config_destroy(cmph_config_t *mph);
|
||||
|
||||
/** Hash API **/
|
||||
|
|
14
src/main.c
14
src/main.c
|
@ -22,12 +22,12 @@
|
|||
|
||||
void usage(const char *prg)
|
||||
{
|
||||
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c value][-s seed] ] [-m file.mph] [-a algorithm] keysfile\n", prg);
|
||||
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c value][-s seed] ] [-a algorithm] [-d tmp_dir] [-m file.mph] keysfile\n", prg);
|
||||
}
|
||||
void usage_long(const char *prg)
|
||||
{
|
||||
cmph_uint32 i;
|
||||
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c value][-s seed] ] [-m file.mph] [-a algorithm] keysfile\n", prg);
|
||||
fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c value][-s seed] ] [-a algorithm] [-d tmp_dir] [-m file.mph] keysfile\n", prg);
|
||||
fprintf(stderr, "Minimum perfect hashing tool\n\n");
|
||||
fprintf(stderr, " -h\t print this help message\n");
|
||||
fprintf(stderr, " -c\t c value that determines the number of vertices in the graph\n");
|
||||
|
@ -41,6 +41,7 @@ void usage_long(const char *prg)
|
|||
fprintf(stderr, " -g\t generation mode\n");
|
||||
fprintf(stderr, " -s\t random seed\n");
|
||||
fprintf(stderr, " -m\t minimum perfect hash function file \n");
|
||||
fprintf(stderr, " -d\t temporary directory used in brz algorithm \n");
|
||||
fprintf(stderr, " keysfile\t line separated file with keys\n");
|
||||
}
|
||||
|
||||
|
@ -62,12 +63,12 @@ int main(int argc, char **argv)
|
|||
float c = 2.09;
|
||||
cmph_config_t *config = NULL;
|
||||
cmph_t *mphf = NULL;
|
||||
|
||||
cmph_uint8 * tmp_dir = NULL;
|
||||
cmph_io_adapter_t *source;
|
||||
|
||||
while (1)
|
||||
{
|
||||
char ch = getopt(argc, argv, "hVvgc:k:a:f:m:s:");
|
||||
char ch = getopt(argc, argv, "hVvgc:k:a:f:m:d:s:");
|
||||
if (ch == -1) break;
|
||||
switch (ch)
|
||||
{
|
||||
|
@ -107,6 +108,9 @@ int main(int argc, char **argv)
|
|||
case 'm':
|
||||
mphf_file = strdup(optarg);
|
||||
break;
|
||||
case 'd':
|
||||
tmp_dir = strdup(optarg);
|
||||
break;
|
||||
case 'v':
|
||||
++verbosity;
|
||||
break;
|
||||
|
@ -197,6 +201,7 @@ int main(int argc, char **argv)
|
|||
cmph_config_set_algo(config, mph_algo);
|
||||
if (nhashes) cmph_config_set_hashfuncs(config, hashes);
|
||||
cmph_config_set_verbosity(config, verbosity);
|
||||
cmph_config_set_tmp_dir(config, tmp_dir);
|
||||
if(mph_algo == CMPH_BMZ && c >= 2.0) c=1.15;
|
||||
if (c != 0) cmph_config_set_graphsize(config, c);
|
||||
mphf = cmph_new(config);
|
||||
|
@ -263,6 +268,7 @@ int main(int argc, char **argv)
|
|||
}
|
||||
fclose(keys_fd);
|
||||
free(mphf_file);
|
||||
free(tmp_dir);
|
||||
free(source);
|
||||
return 0;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue