From 7c654b88a85db0c03ffd7267565437bcd758ac64 Mon Sep 17 00:00:00 2001 From: fc_botelho Date: Fri, 23 Sep 2005 20:54:31 +0000 Subject: [PATCH] stable version of BRZ using extenal memory to flush vector g --- src/Makefile.am | 4 ++-- src/bmz.c | 3 --- src/bmz8.c | 2 -- src/brz.c | 50 ++++++++++++++++++++++++++++++++++++++--------- src/brz_structs.h | 3 ++- src/chm.c | 5 +---- src/cmph.c | 34 +++++++++++++++----------------- src/main.c | 2 +- 8 files changed, 63 insertions(+), 40 deletions(-) diff --git a/src/Makefile.am b/src/Makefile.am index 4c01bc2..1d05357 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -16,8 +16,8 @@ libcmph_la_SOURCES = debug.h\ cmph_structs.h cmph_structs.c\ chm.h chm_structs.h chm.c\ bmz.h bmz_structs.h bmz.c\ - bmz8.h bmz8_structs.h bmz8.c -# brz.h brz_structs.h brz.c + bmz8.h bmz8_structs.h bmz8.c\ + brz.h brz_structs.h brz.c libcmph_la_LDFLAGS = -version-info 0:0:0 diff --git a/src/bmz.c b/src/bmz.c index f3efc96..2fba0c2 100644 --- a/src/bmz.c +++ b/src/bmz.c @@ -446,11 +446,8 @@ int bmz_dump(cmph_t *mphf, FILE *fd) { char *buf = NULL; cmph_uint32 buflen; - cmph_uint32 nbuflen; - cmph_uint32 i; cmph_uint32 two = 2; //number of hash functions bmz_data_t *data = (bmz_data_t *)mphf->data; - cmph_uint32 nn, nm; __cmph_dump(mphf, fd); fwrite(&two, sizeof(cmph_uint32), 1, fd); diff --git a/src/bmz8.c b/src/bmz8.c index 5b5c275..c4837c2 100644 --- a/src/bmz8.c +++ b/src/bmz8.c @@ -453,10 +453,8 @@ int bmz8_dump(cmph_t *mphf, FILE *fd) { char *buf = NULL; cmph_uint32 buflen; - cmph_uint8 i; cmph_uint8 two = 2; //number of hash functions bmz8_data_t *data = (bmz8_data_t *)mphf->data; - cmph_uint8 nn, nm; __cmph_dump(mphf, fd); fwrite(&two, sizeof(cmph_uint8), 1, fd); diff --git a/src/brz.c b/src/brz.c index 91df769..44dcaa7 100755 --- a/src/brz.c +++ b/src/brz.c @@ -1,4 +1,3 @@ - #include "graph.h" #include "bmz8.h" #include "bmz8_structs.h" @@ -23,7 +22,7 @@ static cmph_uint32 brz_min_index(cmph_uint32 * vector, cmph_uint32 n); static char * brz_read_key(FILE * fd); static void brz_destroy_keys_vd(char ** keys_vd, cmph_uint8 nkeys); static void brz_copy_partial_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_io_adapter_t *source); - +static void brz_flush_g(brz_config_data_t *brz, cmph_uint32 *start_index, FILE * fd); brz_config_data_t *brz_config_new() { brz_config_data_t *brz = NULL; @@ -48,7 +47,6 @@ void brz_config_destroy(cmph_config_t *mph) { brz_config_data_t *data = (brz_config_data_t *)mph->data; DEBUGP("Destroying algorithm dependent data\n"); - free(data->tmp_dir); free(data); } @@ -81,12 +79,12 @@ void brz_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir) if(tmp_dir[len-1] != '/') { brz->tmp_dir = calloc(len+2, sizeof(cmph_uint8)); - sprintf(brz->tmp_dir, "%s/\0", tmp_dir); + sprintf(brz->tmp_dir, "%s/", tmp_dir); } else { brz->tmp_dir = calloc(len+1, sizeof(cmph_uint8)); - sprintf(brz->tmp_dir, "%s\0", tmp_dir); + sprintf(brz->tmp_dir, "%s", tmp_dir); } } @@ -169,6 +167,8 @@ cmph_t *brz_new(cmph_config_t *mph, float c) brz->size = NULL; //transfer memory ownership brzf->offset = brz->offset; brz->offset = NULL; //transfer memory ownership + brzf->tmp_dir = brz->tmp_dir; + brz->tmp_dir = NULL; //transfer memory ownership brzf->k = brz->k; brzf->c = brz->c; brzf->m = brz->m; @@ -204,6 +204,7 @@ static int brz_gen_graphs(cmph_config_t *mph) cmph_uint32 max_size = 0; cmph_uint32 cur_bucket = 0; cmph_uint8 nkeys_vd = 0; + cmph_uint32 start_index = 0; char ** keys_vd = NULL; @@ -333,7 +334,11 @@ static int brz_gen_graphs(cmph_config_t *mph) tmp_fds = (FILE **)calloc(nflushes, sizeof(FILE *)); buffer_merge = (cmph_uint8 **)calloc(nflushes, sizeof(cmph_uint8 *)); buffer_h3 = (cmph_uint32 *)calloc(nflushes, sizeof(cmph_uint32)); - + filename = (char *)calloc(strlen(brz->tmp_dir) + 11, sizeof(char)); + sprintf(filename, "%stmpg.cmph",brz->tmp_dir); + tmp_fd = fopen(filename, "w"); + free(filename); + memory_usage = 0; for(i = 0; i < nflushes; i++) { filename = (char *)calloc(strlen(brz->tmp_dir) + 11, sizeof(char)); @@ -406,6 +411,12 @@ static int brz_gen_graphs(cmph_config_t *mph) mphf_tmp = cmph_new(config); bmzf = (bmz8_data_t *)mphf_tmp->data; brz_copy_partial_mphf(brz, bmzf, cur_bucket, source); + memory_usage += brz->size[cur_bucket]; + if((cur_bucket+1 == brz->k)||(memory_usage > brz->memory_availability)) + { + brz_flush_g(brz, &start_index, tmp_fd); + memory_usage = 0; + } cmph_config_destroy(config); brz_destroy_keys_vd(keys_vd, nkeys_vd); cmph_destroy(mphf_tmp); @@ -413,6 +424,7 @@ static int brz_gen_graphs(cmph_config_t *mph) nkeys_vd = 0; } } + fclose(tmp_fd); for(i = 0; i < nflushes; i++) fclose(tmp_fds[i]); free(tmp_fds); free(keys_vd); @@ -458,6 +470,17 @@ static void brz_destroy_keys_vd(char ** keys_vd, cmph_uint8 nkeys) for(i = 0; i < nkeys; i++) free(keys_vd[i]); } +static void brz_flush_g(brz_config_data_t *brz, cmph_uint32 *start_index, FILE * fd) +{ + while(*start_index < brz->k && brz->g[*start_index] != NULL) + { + fwrite(brz->g[*start_index], sizeof(cmph_uint8), brz->size[*start_index], fd); + free(brz->g[*start_index]); + brz->g[*start_index] = NULL; + *start_index = *start_index + 1; + } +} + static void brz_copy_partial_mphf(brz_config_data_t *brz, bmz8_data_t * bmzf, cmph_uint32 index, cmph_io_adapter_t *source) { cmph_uint32 i; @@ -477,9 +500,14 @@ int brz_dump(cmph_t *mphf, FILE *fd) { char *buf = NULL; cmph_uint32 buflen; - cmph_uint32 nbuflen; cmph_uint32 i; brz_data_t *data = (brz_data_t *)mphf->data; + FILE * tmpg_fd = NULL; + char * filename = NULL; + filename = (char *)calloc(strlen(data->tmp_dir) + 11, sizeof(char)); + sprintf(filename, "%stmpg.cmph",data->tmp_dir); + tmpg_fd = fopen(filename, "rb"); + free(filename); DEBUGP("Dumping brzf\n"); __cmph_dump(mphf, fd); @@ -517,14 +545,17 @@ int brz_dump(cmph_t *mphf, FILE *fd) for(i = 0; i < data->k; i++) { cmph_uint32 n = ceil(data->c * data->size[i]); - fwrite(data->g[i], sizeof(cmph_uint8)*n, 1, fd); + buf = (char *)calloc(n, sizeof(cmph_uint8)); + fread(buf, sizeof(cmph_uint8), n, tmpg_fd); + fwrite(buf, sizeof(cmph_uint8), n, fd); + free(buf); } + fclose(tmpg_fd); return 1; } void brz_load(FILE *f, cmph_t *mphf) { - cmph_uint32 nhashes; char *buf = NULL; cmph_uint32 buflen; cmph_uint32 i; @@ -614,6 +645,7 @@ void brz_destroy(cmph_t *mphf) free(data->h2); free(data->size); free(data->offset); + free(data->tmp_dir); free(data); free(mphf); } diff --git a/src/brz_structs.h b/src/brz_structs.h index adee286..e76e717 100755 --- a/src/brz_structs.h +++ b/src/brz_structs.h @@ -13,7 +13,8 @@ struct __brz_data_t cmph_uint32 k; // number of components hash_state_t **h1; hash_state_t **h2; - hash_state_t * h3; + hash_state_t * h3; + cmph_uint8 * tmp_dir; // temporary directory }; struct __brz_config_data_t diff --git a/src/chm.c b/src/chm.c index 3458577..7feec29 100644 --- a/src/chm.c +++ b/src/chm.c @@ -182,7 +182,7 @@ static int chm_gen_edges(cmph_config_t *mph) if (h1 == h2) if (++h2 >= chm->n) h2 = 0; if (h1 == h2) { - if (mph->verbosity) fprintf(stderr, "Self loop for key %e\n", e); + if (mph->verbosity) fprintf(stderr, "Self loop for key %u\n", e); mph->key_source->dispose(mph->key_source->data, key, keylen); return 0; } @@ -201,7 +201,6 @@ int chm_dump(cmph_t *mphf, FILE *fd) { char *buf = NULL; cmph_uint32 buflen; - cmph_uint32 i; cmph_uint32 two = 2; //number of hash functions chm_data_t *data = (chm_data_t *)mphf->data; __cmph_dump(mphf, fd); @@ -234,11 +233,9 @@ int chm_dump(cmph_t *mphf, FILE *fd) void chm_load(FILE *f, cmph_t *mphf) { cmph_uint32 nhashes; - char fbuf[BUFSIZ]; char *buf = NULL; cmph_uint32 buflen; cmph_uint32 i; - hash_state_t *state; chm_data_t *chm = (chm_data_t *)malloc(sizeof(chm_data_t)); DEBUGP("Loading chm mphf\n"); diff --git a/src/cmph.c b/src/cmph.c index 7b73341..d03bc74 100644 --- a/src/cmph.c +++ b/src/cmph.c @@ -3,11 +3,11 @@ #include "chm.h" #include "bmz.h" #include "bmz8.h" /* included -- Fabiano */ -//#include "brz.h" /* included -- Fabiano */ +#include "brz.h" /* included -- Fabiano */ #include #include - +#include //#define DEBUG #include "debug.h" @@ -159,7 +159,7 @@ void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo) bmz8_config_destroy(mph); break; case CMPH_BRZ: -// brz_config_destroy(mph); + brz_config_destroy(mph); break; default: assert(0); @@ -176,7 +176,7 @@ void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo) mph->data = bmz8_config_new(); break; case CMPH_BRZ: -// mph->data = brz_config_new(); + mph->data = brz_config_new(); break; default: assert(0); @@ -196,7 +196,7 @@ void cmph_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir) case CMPH_BMZ8: /* included -- Fabiano */ break; case CMPH_BRZ: /* included -- Fabiano */ -// brz_config_set_tmp_dir(mph, tmp_dir); + brz_config_set_tmp_dir(mph, tmp_dir); break; default: assert(0); @@ -215,7 +215,7 @@ void cmph_config_set_memory_availability(cmph_config_t *mph, cmph_uint32 memory_ case CMPH_BMZ8: /* included -- Fabiano */ break; case CMPH_BRZ: /* included -- Fabiano */ -// brz_config_set_memory_availability(mph, memory_availability); + brz_config_set_memory_availability(mph, memory_availability); break; default: assert(0); @@ -238,7 +238,7 @@ void cmph_config_destroy(cmph_config_t *mph) bmz8_config_destroy(mph); break; case CMPH_BRZ: /* included -- Fabiano */ -// brz_config_destroy(mph); + brz_config_destroy(mph); break; default: assert(0); @@ -265,7 +265,7 @@ void cmph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs) bmz8_config_set_hashfuncs(mph, hashfuncs); break; case CMPH_BRZ: /* included -- Fabiano */ -// brz_config_set_hashfuncs(mph, hashfuncs); + brz_config_set_hashfuncs(mph, hashfuncs); break; default: break; @@ -302,9 +302,9 @@ cmph_t *cmph_new(cmph_config_t *mph) mphf = bmz8_new(mph, c); break; case CMPH_BRZ: /* included -- Fabiano */ -/* DEBUGP("Creating brz hash\n"); + DEBUGP("Creating brz hash\n"); if (c == 0) c = 1.15; - mphf = brz_new(mph, c);*/ + mphf = brz_new(mph, c); break; default: assert(0); @@ -323,8 +323,7 @@ int cmph_dump(cmph_t *mphf, FILE *f) case CMPH_BMZ8: /* included -- Fabiano */ return bmz8_dump(mphf, f); case CMPH_BRZ: /* included -- Fabiano */ - break; -/* return brz_dump(mphf, f);*/ + return brz_dump(mphf, f); default: assert(0); } @@ -353,8 +352,8 @@ cmph_t *cmph_load(FILE *f) bmz8_load(f, mphf); break; case CMPH_BRZ: /* included -- Fabiano */ -/* DEBUGP("Loading brz algorithm dependent parts\n"); - brz_load(f, mphf);*/ + DEBUGP("Loading brz algorithm dependent parts\n"); + brz_load(f, mphf); break; default: assert(0); @@ -378,9 +377,8 @@ cmph_uint32 cmph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen) DEBUGP("bmz8 algorithm search\n"); return bmz8_search(mphf, key, keylen); case CMPH_BRZ: /* included -- Fabiano */ - break; -/* DEBUGP("brz algorithm search\n"); - return brz_search(mphf, key, keylen);*/ + DEBUGP("brz algorithm search\n"); + return brz_search(mphf, key, keylen); default: assert(0); } @@ -407,7 +405,7 @@ void cmph_destroy(cmph_t *mphf) bmz8_destroy(mphf); return; case CMPH_BRZ: /* included -- Fabiano */ -/* brz_destroy(mphf);*/ + brz_destroy(mphf); return; default: assert(0); diff --git a/src/main.c b/src/main.c index 701b997..112fdc7 100644 --- a/src/main.c +++ b/src/main.c @@ -143,7 +143,7 @@ int main(int argc, char **argv) break; } } - if (!valid || mph_algo == CMPH_BRZ) + if (!valid) { fprintf(stderr, "Invalid mph algorithm: %s. It is not available in version %s\n", optarg, VERSION); return -1;