stable version of BRZ using extenal memory to flush vector g

This commit is contained in:
fc_botelho 2005-09-23 20:54:31 +00:00
parent e1b7b74776
commit ded7440fd4
8 changed files with 63 additions and 40 deletions

View File

@ -16,8 +16,8 @@ libcmph_la_SOURCES = debug.h\
cmph_structs.h cmph_structs.c\ cmph_structs.h cmph_structs.c\
chm.h chm_structs.h chm.c\ chm.h chm_structs.h chm.c\
bmz.h bmz_structs.h bmz.c\ bmz.h bmz_structs.h bmz.c\
bmz8.h bmz8_structs.h bmz8.c bmz8.h bmz8_structs.h bmz8.c\
# brz.h brz_structs.h brz.c brz.h brz_structs.h brz.c
libcmph_la_LDFLAGS = -version-info 0:0:0 libcmph_la_LDFLAGS = -version-info 0:0:0

View File

@ -446,11 +446,8 @@ int bmz_dump(cmph_t *mphf, FILE *fd)
{ {
char *buf = NULL; char *buf = NULL;
cmph_uint32 buflen; cmph_uint32 buflen;
cmph_uint32 nbuflen;
cmph_uint32 i;
cmph_uint32 two = 2; //number of hash functions cmph_uint32 two = 2; //number of hash functions
bmz_data_t *data = (bmz_data_t *)mphf->data; bmz_data_t *data = (bmz_data_t *)mphf->data;
cmph_uint32 nn, nm;
__cmph_dump(mphf, fd); __cmph_dump(mphf, fd);
fwrite(&two, sizeof(cmph_uint32), 1, fd); fwrite(&two, sizeof(cmph_uint32), 1, fd);

View File

@ -453,10 +453,8 @@ int bmz8_dump(cmph_t *mphf, FILE *fd)
{ {
char *buf = NULL; char *buf = NULL;
cmph_uint32 buflen; cmph_uint32 buflen;
cmph_uint8 i;
cmph_uint8 two = 2; //number of hash functions cmph_uint8 two = 2; //number of hash functions
bmz8_data_t *data = (bmz8_data_t *)mphf->data; bmz8_data_t *data = (bmz8_data_t *)mphf->data;
cmph_uint8 nn, nm;
__cmph_dump(mphf, fd); __cmph_dump(mphf, fd);
fwrite(&two, sizeof(cmph_uint8), 1, fd); fwrite(&two, sizeof(cmph_uint8), 1, fd);

View File

@ -1,4 +1,3 @@
#include "graph.h" #include "graph.h"
#include "bmz8.h" #include "bmz8.h"
#include "bmz8_structs.h" #include "bmz8_structs.h"
@ -23,7 +22,7 @@ static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n);
static char * brz_read_key(FILE * fd); static char * brz_read_key(FILE * fd);
static void brz_destroy_keys_vd(char ** keys_vd, cmph_uint8 nkeys); static void brz_destroy_keys_vd(char ** keys_vd, cmph_uint8 nkeys);
static void brz_copy_partial_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_io_adapter_t *source); static void brz_copy_partial_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_io_adapter_t *source);
static void brz_flush_g(brz_config_data_t *brz, cmph_uint32 *start_index, FILE * fd);
brz_config_data_t *brz_config_new() brz_config_data_t *brz_config_new()
{ {
brz_config_data_t *brz = NULL; brz_config_data_t *brz = NULL;
@ -48,7 +47,6 @@ void brz_config_destroy(cmph_config_t *mph)
{ {
brz_config_data_t *data = (brz_config_data_t *)mph->data; brz_config_data_t *data = (brz_config_data_t *)mph->data;
DEBUGP("Destroying algorithm dependent data\n"); DEBUGP("Destroying algorithm dependent data\n");
free(data->tmp_dir);
free(data); free(data);
} }
@ -81,12 +79,12 @@ void brz_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir)
if(tmp_dir[len-1] != '/') if(tmp_dir[len-1] != '/')
{ {
brz->tmp_dir = calloc(len+2, sizeof(cmph_uint8)); brz->tmp_dir = calloc(len+2, sizeof(cmph_uint8));
sprintf(brz->tmp_dir, "%s/\0", tmp_dir); sprintf(brz->tmp_dir, "%s/", tmp_dir);
} }
else else
{ {
brz->tmp_dir = calloc(len+1, sizeof(cmph_uint8)); brz->tmp_dir = calloc(len+1, sizeof(cmph_uint8));
sprintf(brz->tmp_dir, "%s\0", tmp_dir); sprintf(brz->tmp_dir, "%s", tmp_dir);
} }
} }
@ -169,6 +167,8 @@ cmph_t *brz_new(cmph_config_t *mph, float c)
brz->size = NULL; //transfer memory ownership brz->size = NULL; //transfer memory ownership
brzf->offset = brz->offset; brzf->offset = brz->offset;
brz->offset = NULL; //transfer memory ownership brz->offset = NULL; //transfer memory ownership
brzf->tmp_dir = brz->tmp_dir;
brz->tmp_dir = NULL; //transfer memory ownership
brzf->k = brz->k; brzf->k = brz->k;
brzf->c = brz->c; brzf->c = brz->c;
brzf->m = brz->m; brzf->m = brz->m;
@ -204,6 +204,7 @@ static int brz_gen_graphs(cmph_config_t *mph)
cmph_uint32 max_size = 0; cmph_uint32 max_size = 0;
cmph_uint32 cur_bucket = 0; cmph_uint32 cur_bucket = 0;
cmph_uint8 nkeys_vd = 0; cmph_uint8 nkeys_vd = 0;
cmph_uint32 start_index = 0;
char ** keys_vd = NULL; char ** keys_vd = NULL;
@ -333,7 +334,11 @@ static int brz_gen_graphs(cmph_config_t *mph)
tmp_fds = (FILE **)calloc(nflushes, sizeof(FILE *)); tmp_fds = (FILE **)calloc(nflushes, sizeof(FILE *));
buffer_merge = (cmph_uint8 **)calloc(nflushes, sizeof(cmph_uint8 *)); buffer_merge = (cmph_uint8 **)calloc(nflushes, sizeof(cmph_uint8 *));
buffer_h3 = (cmph_uint32 *)calloc(nflushes, sizeof(cmph_uint32)); buffer_h3 = (cmph_uint32 *)calloc(nflushes, sizeof(cmph_uint32));
filename = (char *)calloc(strlen(brz->tmp_dir) + 11, sizeof(char));
sprintf(filename, "%stmpg.cmph",brz->tmp_dir);
tmp_fd = fopen(filename, "w");
free(filename);
memory_usage = 0;
for(i = 0; i < nflushes; i++) for(i = 0; i < nflushes; i++)
{ {
filename = (char *)calloc(strlen(brz->tmp_dir) + 11, sizeof(char)); filename = (char *)calloc(strlen(brz->tmp_dir) + 11, sizeof(char));
@ -406,6 +411,12 @@ static int brz_gen_graphs(cmph_config_t *mph)
mphf_tmp = cmph_new(config); mphf_tmp = cmph_new(config);
bmzf = (bmz8_data_t *)mphf_tmp->data; bmzf = (bmz8_data_t *)mphf_tmp->data;
brz_copy_partial_mphf(brz, bmzf, cur_bucket, source); brz_copy_partial_mphf(brz, bmzf, cur_bucket, source);
memory_usage += brz->size[cur_bucket];
if((cur_bucket+1 == brz->k)||(memory_usage > brz->memory_availability))
{
brz_flush_g(brz, &start_index, tmp_fd);
memory_usage = 0;
}
cmph_config_destroy(config); cmph_config_destroy(config);
brz_destroy_keys_vd(keys_vd, nkeys_vd); brz_destroy_keys_vd(keys_vd, nkeys_vd);
cmph_destroy(mphf_tmp); cmph_destroy(mphf_tmp);
@ -413,6 +424,7 @@ static int brz_gen_graphs(cmph_config_t *mph)
nkeys_vd = 0; nkeys_vd = 0;
} }
} }
fclose(tmp_fd);
for(i = 0; i < nflushes; i++) fclose(tmp_fds[i]); for(i = 0; i < nflushes; i++) fclose(tmp_fds[i]);
free(tmp_fds); free(tmp_fds);
free(keys_vd); free(keys_vd);
@ -458,6 +470,17 @@ static void brz_destroy_keys_vd(char ** keys_vd, cmph_uint8 nkeys)
for(i = 0; i < nkeys; i++) free(keys_vd[i]); for(i = 0; i < nkeys; i++) free(keys_vd[i]);
} }
static void brz_flush_g(brz_config_data_t *brz, cmph_uint32 *start_index, FILE * fd)
{
while(*start_index < brz->k && brz->g[*start_index] != NULL)
{
fwrite(brz->g[*start_index], sizeof(cmph_uint8), brz->size[*start_index], fd);
free(brz->g[*start_index]);
brz->g[*start_index] = NULL;
*start_index = *start_index + 1;
}
}
static void brz_copy_partial_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_io_adapter_t *source) static void brz_copy_partial_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_io_adapter_t *source)
{ {
cmph_uint32 i; cmph_uint32 i;
@ -477,9 +500,14 @@ int brz_dump(cmph_t *mphf, FILE *fd)
{ {
char *buf = NULL; char *buf = NULL;
cmph_uint32 buflen; cmph_uint32 buflen;
cmph_uint32 nbuflen;
cmph_uint32 i; cmph_uint32 i;
brz_data_t *data = (brz_data_t *)mphf->data; brz_data_t *data = (brz_data_t *)mphf->data;
FILE * tmpg_fd = NULL;
char * filename = NULL;
filename = (char *)calloc(strlen(data->tmp_dir) + 11, sizeof(char));
sprintf(filename, "%stmpg.cmph",data->tmp_dir);
tmpg_fd = fopen(filename, "rb");
free(filename);
DEBUGP("Dumping brzf\n"); DEBUGP("Dumping brzf\n");
__cmph_dump(mphf, fd); __cmph_dump(mphf, fd);
@ -517,14 +545,17 @@ int brz_dump(cmph_t *mphf, FILE *fd)
for(i = 0; i < data->k; i++) for(i = 0; i < data->k; i++)
{ {
cmph_uint32 n = ceil(data->c * data->size[i]); cmph_uint32 n = ceil(data->c * data->size[i]);
fwrite(data->g[i], sizeof(cmph_uint8)*n, 1, fd); buf = (char *)calloc(n, sizeof(cmph_uint8));
fread(buf, sizeof(cmph_uint8), n, tmpg_fd);
fwrite(buf, sizeof(cmph_uint8), n, fd);
free(buf);
} }
fclose(tmpg_fd);
return 1; return 1;
} }
void brz_load(FILE *f, cmph_t *mphf) void brz_load(FILE *f, cmph_t *mphf)
{ {
cmph_uint32 nhashes;
char *buf = NULL; char *buf = NULL;
cmph_uint32 buflen; cmph_uint32 buflen;
cmph_uint32 i; cmph_uint32 i;
@ -614,6 +645,7 @@ void brz_destroy(cmph_t *mphf)
free(data->h2); free(data->h2);
free(data->size); free(data->size);
free(data->offset); free(data->offset);
free(data->tmp_dir);
free(data); free(data);
free(mphf); free(mphf);
} }

View File

@ -14,6 +14,7 @@ struct __brz_data_t
hash_state_t **h1; hash_state_t **h1;
hash_state_t **h2; hash_state_t **h2;
hash_state_t * h3; hash_state_t * h3;
cmph_uint8 * tmp_dir; // temporary directory
}; };
struct __brz_config_data_t struct __brz_config_data_t

View File

@ -182,7 +182,7 @@ static int chm_gen_edges(cmph_config_t *mph)
if (h1 == h2) if (++h2 >= chm->n) h2 = 0; if (h1 == h2) if (++h2 >= chm->n) h2 = 0;
if (h1 == h2) if (h1 == h2)
{ {
if (mph->verbosity) fprintf(stderr, "Self loop for key %e\n", e); if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e);
mph->key_source->dispose(mph->key_source->data, key, keylen); mph->key_source->dispose(mph->key_source->data, key, keylen);
return 0; return 0;
} }
@ -201,7 +201,6 @@ int chm_dump(cmph_t *mphf, FILE *fd)
{ {
char *buf = NULL; char *buf = NULL;
cmph_uint32 buflen; cmph_uint32 buflen;
cmph_uint32 i;
cmph_uint32 two = 2; //number of hash functions cmph_uint32 two = 2; //number of hash functions
chm_data_t *data = (chm_data_t *)mphf->data; chm_data_t *data = (chm_data_t *)mphf->data;
__cmph_dump(mphf, fd); __cmph_dump(mphf, fd);
@ -234,11 +233,9 @@ int chm_dump(cmph_t *mphf, FILE *fd)
void chm_load(FILE *f, cmph_t *mphf) void chm_load(FILE *f, cmph_t *mphf)
{ {
cmph_uint32 nhashes; cmph_uint32 nhashes;
char fbuf[BUFSIZ];
char *buf = NULL; char *buf = NULL;
cmph_uint32 buflen; cmph_uint32 buflen;
cmph_uint32 i; cmph_uint32 i;
hash_state_t *state;
chm_data_t *chm = (chm_data_t *)malloc(sizeof(chm_data_t)); chm_data_t *chm = (chm_data_t *)malloc(sizeof(chm_data_t));
DEBUGP("Loading chm mphf\n"); DEBUGP("Loading chm mphf\n");

View File

@ -3,11 +3,11 @@
#include "chm.h" #include "chm.h"
#include "bmz.h" #include "bmz.h"
#include "bmz8.h" /* included -- Fabiano */ #include "bmz8.h" /* included -- Fabiano */
//#include "brz.h" /* included -- Fabiano */ #include "brz.h" /* included -- Fabiano */
#include <stdlib.h> #include <stdlib.h>
#include <assert.h> #include <assert.h>
#include <string.h>
//#define DEBUG //#define DEBUG
#include "debug.h" #include "debug.h"
@ -159,7 +159,7 @@ void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo)
bmz8_config_destroy(mph); bmz8_config_destroy(mph);
break; break;
case CMPH_BRZ: case CMPH_BRZ:
// brz_config_destroy(mph); brz_config_destroy(mph);
break; break;
default: default:
assert(0); assert(0);
@ -176,7 +176,7 @@ void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo)
mph->data = bmz8_config_new(); mph->data = bmz8_config_new();
break; break;
case CMPH_BRZ: case CMPH_BRZ:
// mph->data = brz_config_new(); mph->data = brz_config_new();
break; break;
default: default:
assert(0); assert(0);
@ -196,7 +196,7 @@ void cmph_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir)
case CMPH_BMZ8: /* included -- Fabiano */ case CMPH_BMZ8: /* included -- Fabiano */
break; break;
case CMPH_BRZ: /* included -- Fabiano */ case CMPH_BRZ: /* included -- Fabiano */
// brz_config_set_tmp_dir(mph, tmp_dir); brz_config_set_tmp_dir(mph, tmp_dir);
break; break;
default: default:
assert(0); assert(0);
@ -215,7 +215,7 @@ void cmph_config_set_memory_availability(cmph_config_t *mph, cmph_uint32 memory_
case CMPH_BMZ8: /* included -- Fabiano */ case CMPH_BMZ8: /* included -- Fabiano */
break; break;
case CMPH_BRZ: /* included -- Fabiano */ case CMPH_BRZ: /* included -- Fabiano */
// brz_config_set_memory_availability(mph, memory_availability); brz_config_set_memory_availability(mph, memory_availability);
break; break;
default: default:
assert(0); assert(0);
@ -238,7 +238,7 @@ void cmph_config_destroy(cmph_config_t *mph)
bmz8_config_destroy(mph); bmz8_config_destroy(mph);
break; break;
case CMPH_BRZ: /* included -- Fabiano */ case CMPH_BRZ: /* included -- Fabiano */
// brz_config_destroy(mph); brz_config_destroy(mph);
break; break;
default: default:
assert(0); assert(0);
@ -265,7 +265,7 @@ void cmph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
bmz8_config_set_hashfuncs(mph, hashfuncs); bmz8_config_set_hashfuncs(mph, hashfuncs);
break; break;
case CMPH_BRZ: /* included -- Fabiano */ case CMPH_BRZ: /* included -- Fabiano */
// brz_config_set_hashfuncs(mph, hashfuncs); brz_config_set_hashfuncs(mph, hashfuncs);
break; break;
default: default:
break; break;
@ -302,9 +302,9 @@ cmph_t *cmph_new(cmph_config_t *mph)
mphf = bmz8_new(mph, c); mphf = bmz8_new(mph, c);
break; break;
case CMPH_BRZ: /* included -- Fabiano */ case CMPH_BRZ: /* included -- Fabiano */
/* DEBUGP("Creating brz hash\n"); DEBUGP("Creating brz hash\n");
if (c == 0) c = 1.15; if (c == 0) c = 1.15;
mphf = brz_new(mph, c);*/ mphf = brz_new(mph, c);
break; break;
default: default:
assert(0); assert(0);
@ -323,8 +323,7 @@ int cmph_dump(cmph_t *mphf, FILE *f)
case CMPH_BMZ8: /* included -- Fabiano */ case CMPH_BMZ8: /* included -- Fabiano */
return bmz8_dump(mphf, f); return bmz8_dump(mphf, f);
case CMPH_BRZ: /* included -- Fabiano */ case CMPH_BRZ: /* included -- Fabiano */
break; return brz_dump(mphf, f);
/* return brz_dump(mphf, f);*/
default: default:
assert(0); assert(0);
} }
@ -353,8 +352,8 @@ cmph_t *cmph_load(FILE *f)
bmz8_load(f, mphf); bmz8_load(f, mphf);
break; break;
case CMPH_BRZ: /* included -- Fabiano */ case CMPH_BRZ: /* included -- Fabiano */
/* DEBUGP("Loading brz algorithm dependent parts\n"); DEBUGP("Loading brz algorithm dependent parts\n");
brz_load(f, mphf);*/ brz_load(f, mphf);
break; break;
default: default:
assert(0); assert(0);
@ -378,9 +377,8 @@ cmph_uint32 cmph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
DEBUGP("bmz8 algorithm search\n"); DEBUGP("bmz8 algorithm search\n");
return bmz8_search(mphf, key, keylen); return bmz8_search(mphf, key, keylen);
case CMPH_BRZ: /* included -- Fabiano */ case CMPH_BRZ: /* included -- Fabiano */
break; DEBUGP("brz algorithm search\n");
/* DEBUGP("brz algorithm search\n"); return brz_search(mphf, key, keylen);
return brz_search(mphf, key, keylen);*/
default: default:
assert(0); assert(0);
} }
@ -407,7 +405,7 @@ void cmph_destroy(cmph_t *mphf)
bmz8_destroy(mphf); bmz8_destroy(mphf);
return; return;
case CMPH_BRZ: /* included -- Fabiano */ case CMPH_BRZ: /* included -- Fabiano */
/* brz_destroy(mphf);*/ brz_destroy(mphf);
return; return;
default: default:
assert(0); assert(0);

View File

@ -143,7 +143,7 @@ int main(int argc, char **argv)
break; break;
} }
} }
if (!valid || mph_algo == CMPH_BRZ) if (!valid)
{ {
fprintf(stderr, "Invalid mph algorithm: %s. It is not available in version %s\n", optarg, VERSION); fprintf(stderr, "Invalid mph algorithm: %s. It is not available in version %s\n", optarg, VERSION);
return -1; return -1;