Merge remote-tracking branch 'sf/githubmaster'
Bring fixes from sourceforge.
This commit is contained in:
commit
b055b8d3cf
@ -4,5 +4,6 @@ pkgconfig_DATA = cmph.pc
|
|||||||
if USE_CXXMPH
|
if USE_CXXMPH
|
||||||
pkgconfig_DATA += cxxmph.pc
|
pkgconfig_DATA += cxxmph.pc
|
||||||
endif
|
endif
|
||||||
|
ACLOCAL_AMFLAGS="-I m4"
|
||||||
|
|
||||||
pkgconfigdir = $(libdir)/pkgconfig
|
pkgconfigdir = $(libdir)/pkgconfig
|
||||||
|
@ -31,7 +31,7 @@ AC_CHECK_HEADERS([getopt.h math.h])
|
|||||||
dnl Checks for libraries.
|
dnl Checks for libraries.
|
||||||
LT_LIB_M
|
LT_LIB_M
|
||||||
LDFLAGS="$LIBM $LDFLAGS"
|
LDFLAGS="$LIBM $LDFLAGS"
|
||||||
CFLAGS="-Wall"
|
#CFLAGS="-Wall -g"
|
||||||
|
|
||||||
AC_PROG_CXX
|
AC_PROG_CXX
|
||||||
CXXFLAGS="-Wall -Wno-unused-function -DNDEBUG -O3 -fomit-frame-pointer $CXXFLAGS"
|
CXXFLAGS="-Wall -Wno-unused-function -DNDEBUG -O3 -fomit-frame-pointer $CXXFLAGS"
|
||||||
@ -40,7 +40,7 @@ if test x$cxxmph = xtrue; then
|
|||||||
AC_COMPILE_STDCXX_0X
|
AC_COMPILE_STDCXX_0X
|
||||||
if test x$ac_cv_cxx_compile_cxx0x_native = "xno"; then
|
if test x$ac_cv_cxx_compile_cxx0x_native = "xno"; then
|
||||||
if test x$ac_cv_cxx_compile_cxx0x_cxx = "xyes"; then
|
if test x$ac_cv_cxx_compile_cxx0x_cxx = "xyes"; then
|
||||||
CXXFLAGS="$CXXFLAGS -std=c++0x"
|
CXXFLAGS="$CXXFLAGS -std=c++11"
|
||||||
elif test x$ac_cv_cxx_compile_cxx0x_gxx = "xyes"; then
|
elif test x$ac_cv_cxx_compile_cxx0x_gxx = "xyes"; then
|
||||||
CXXFLAGS="$CXXFLAGS -std=gnu++0x"
|
CXXFLAGS="$CXXFLAGS -std=gnu++0x"
|
||||||
else
|
else
|
||||||
|
58
cxxmph/.ycm_extra_conf.py
Normal file
58
cxxmph/.ycm_extra_conf.py
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
import os
|
||||||
|
import ycm_core
|
||||||
|
|
||||||
|
flags = [
|
||||||
|
'-Wall',
|
||||||
|
'-Wextra',
|
||||||
|
'-Werror',
|
||||||
|
'-DNDEBUG',
|
||||||
|
'-DUSE_CLANG_COMPLETER',
|
||||||
|
'-std=c++11',
|
||||||
|
'-x',
|
||||||
|
'c++',
|
||||||
|
'-isystem'
|
||||||
|
'/usr/lib/c++/v1',
|
||||||
|
'-I',
|
||||||
|
'.',
|
||||||
|
]
|
||||||
|
|
||||||
|
def DirectoryOfThisScript():
|
||||||
|
return os.path.dirname( os.path.abspath( __file__ ) )
|
||||||
|
|
||||||
|
|
||||||
|
def MakeRelativePathsInFlagsAbsolute( flags, working_directory ):
|
||||||
|
if not working_directory:
|
||||||
|
return list( flags )
|
||||||
|
new_flags = []
|
||||||
|
make_next_absolute = False
|
||||||
|
path_flags = [ '-isystem', '-I', '-iquote', '--sysroot=' ]
|
||||||
|
for flag in flags:
|
||||||
|
new_flag = flag
|
||||||
|
|
||||||
|
if make_next_absolute:
|
||||||
|
make_next_absolute = False
|
||||||
|
if not flag.startswith( '/' ):
|
||||||
|
new_flag = os.path.join( working_directory, flag )
|
||||||
|
|
||||||
|
for path_flag in path_flags:
|
||||||
|
if flag == path_flag:
|
||||||
|
make_next_absolute = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if flag.startswith( path_flag ):
|
||||||
|
path = flag[ len( path_flag ): ]
|
||||||
|
new_flag = path_flag + os.path.join( working_directory, path )
|
||||||
|
break
|
||||||
|
|
||||||
|
if new_flag:
|
||||||
|
new_flags.append( new_flag )
|
||||||
|
return new_flags
|
||||||
|
|
||||||
|
|
||||||
|
def FlagsForFile( filename ):
|
||||||
|
relative_to = DirectoryOfThisScript()
|
||||||
|
final_flags = MakeRelativePathsInFlagsAbsolute( flags, relative_to )
|
||||||
|
return {
|
||||||
|
'flags': final_flags,
|
||||||
|
'do_cache': True
|
||||||
|
}
|
@ -52,12 +52,12 @@ inline uint64_t rotl64 ( uint64_t x, int8_t r )
|
|||||||
// Block read - if your platform needs to do endian-swapping or can only
|
// Block read - if your platform needs to do endian-swapping or can only
|
||||||
// handle aligned reads, do the conversion here
|
// handle aligned reads, do the conversion here
|
||||||
|
|
||||||
FORCE_INLINE uint32_t getblock ( const uint32_t * p, int i )
|
/*FORCE_INLINE*/ uint32_t getblock ( const uint32_t * p, int i )
|
||||||
{
|
{
|
||||||
return p[i];
|
return p[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i )
|
/*FORCE_INLINE*/ uint64_t getblock ( const uint64_t * p, int i )
|
||||||
{
|
{
|
||||||
return p[i];
|
return p[i];
|
||||||
}
|
}
|
||||||
@ -65,7 +65,7 @@ FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i )
|
|||||||
//-----------------------------------------------------------------------------
|
//-----------------------------------------------------------------------------
|
||||||
// Finalization mix - force all bits of a hash block to avalanche
|
// Finalization mix - force all bits of a hash block to avalanche
|
||||||
|
|
||||||
FORCE_INLINE uint32_t fmix ( uint32_t h )
|
/*FORCE_INLINE*/ uint32_t fmix ( uint32_t h )
|
||||||
{
|
{
|
||||||
h ^= h >> 16;
|
h ^= h >> 16;
|
||||||
h *= 0x85ebca6b;
|
h *= 0x85ebca6b;
|
||||||
@ -78,7 +78,7 @@ FORCE_INLINE uint32_t fmix ( uint32_t h )
|
|||||||
|
|
||||||
//----------
|
//----------
|
||||||
|
|
||||||
FORCE_INLINE uint64_t fmix ( uint64_t k )
|
/*FORCE_INLINE*/ uint64_t fmix ( uint64_t k )
|
||||||
{
|
{
|
||||||
k ^= k >> 33;
|
k ^= k >> 33;
|
||||||
k *= BIG_CONSTANT(0xff51afd7ed558ccd);
|
k *= BIG_CONSTANT(0xff51afd7ed558ccd);
|
||||||
|
@ -12,7 +12,7 @@ using cxxmph::hollow_iterator_base;
|
|||||||
using cxxmph::make_hollow;
|
using cxxmph::make_hollow;
|
||||||
using cxxmph::is_empty;
|
using cxxmph::is_empty;
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int, char**) {
|
||||||
vector<int> v;
|
vector<int> v;
|
||||||
vector<bool> p;
|
vector<bool> p;
|
||||||
for (int i = 0; i < 100; ++i) {
|
for (int i = 0; i < 100; ++i) {
|
||||||
|
@ -39,6 +39,7 @@ namespace cxxmph {
|
|||||||
|
|
||||||
MPHIndex::~MPHIndex() {
|
MPHIndex::~MPHIndex() {
|
||||||
clear();
|
clear();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void MPHIndex::clear() {
|
void MPHIndex::clear() {
|
||||||
|
@ -238,7 +238,7 @@ MPH_MAP_INLINE_METHOD_DECL(my_int32_t, index)(const key_type& k) const {
|
|||||||
MPH_MAP_METHOD_DECL(data_type&, operator[])(const key_type& k) {
|
MPH_MAP_METHOD_DECL(data_type&, operator[])(const key_type& k) {
|
||||||
return insert(make_pair(k, data_type())).first->second;
|
return insert(make_pair(k, data_type())).first->second;
|
||||||
}
|
}
|
||||||
MPH_MAP_METHOD_DECL(void_type, rehash)(size_type nbuckets) {
|
MPH_MAP_METHOD_DECL(void_type, rehash)(size_type /*nbuckets*/) {
|
||||||
pack();
|
pack();
|
||||||
vector<value_type>(values_.begin(), values_.end()).swap(values_);
|
vector<value_type>(values_.begin(), values_.end()).swap(values_);
|
||||||
vector<bool>(present_.begin(), present_.end()).swap(present_);
|
vector<bool>(present_.begin(), present_.end()).swap(present_);
|
||||||
|
@ -15,7 +15,7 @@ namespace cxxmph {
|
|||||||
struct h128 {
|
struct h128 {
|
||||||
const uint32_t& operator[](uint8_t i) const { return uint32[i]; }
|
const uint32_t& operator[](uint8_t i) const { return uint32[i]; }
|
||||||
uint32_t& operator[](uint8_t i) { return uint32[i]; }
|
uint32_t& operator[](uint8_t i) { return uint32[i]; }
|
||||||
const uint64_t get64(bool second) const { return (static_cast<uint64_t>(uint32[second << 1]) << 32) | uint32[1 + (second << 1)]; }
|
uint64_t get64(bool second) const { return (static_cast<uint64_t>(uint32[second << 1]) << 32) | uint32[1 + (second << 1)]; }
|
||||||
void set64(uint64_t v, bool second) { uint32[second << 1] = v >> 32; uint32[1+(second<<1)] = ((v << 32) >> 32); }
|
void set64(uint64_t v, bool second) { uint32[second << 1] = v >> 32; uint32[1+(second<<1)] = ((v << 32) >> 32); }
|
||||||
bool operator==(const h128 rhs) const { return memcmp(uint32, rhs.uint32, sizeof(uint32)) == 0; }
|
bool operator==(const h128 rhs) const { return memcmp(uint32, rhs.uint32, sizeof(uint32)) == 0; }
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
noinst_PROGRAMS = vector_adapter_ex1 file_adapter_ex2 struct_vector_adapter_ex3
|
noinst_PROGRAMS = vector_adapter_ex1 file_adapter_ex2 struct_vector_adapter_ex3 small_set_ex4
|
||||||
|
|
||||||
INCLUDES = -I../src/
|
AM_CPPFLAGS = -I../src/
|
||||||
|
|
||||||
vector_adapter_ex1_LDADD = ../src/libcmph.la
|
vector_adapter_ex1_LDADD = ../src/libcmph.la
|
||||||
vector_adapter_ex1_SOURCES = vector_adapter_ex1.c
|
vector_adapter_ex1_SOURCES = vector_adapter_ex1.c
|
||||||
@ -10,3 +10,6 @@ file_adapter_ex2_SOURCES = file_adapter_ex2.c
|
|||||||
|
|
||||||
struct_vector_adapter_ex3_LDADD = ../src/libcmph.la
|
struct_vector_adapter_ex3_LDADD = ../src/libcmph.la
|
||||||
struct_vector_adapter_ex3_SOURCES = struct_vector_adapter_ex3.c
|
struct_vector_adapter_ex3_SOURCES = struct_vector_adapter_ex3.c
|
||||||
|
|
||||||
|
small_set_ex4_LDADD = ../src/libcmph.la
|
||||||
|
small_set_ex4_SOURCES = small_set_ex4.c
|
||||||
|
105
examples/small_set_ex4.c
Normal file
105
examples/small_set_ex4.c
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
#include <cmph.h>
|
||||||
|
|
||||||
|
int test(cmph_uint32* items_to_hash, cmph_uint32 items_len, CMPH_ALGO alg_n)
|
||||||
|
{
|
||||||
|
cmph_t *hash;
|
||||||
|
cmph_config_t *config;
|
||||||
|
cmph_io_adapter_t *source;
|
||||||
|
cmph_uint32 i;
|
||||||
|
char filename[256];
|
||||||
|
FILE* mphf_fd = NULL;
|
||||||
|
|
||||||
|
printf("%s (%u)\n", cmph_names[alg_n], alg_n);
|
||||||
|
|
||||||
|
source = cmph_io_struct_vector_adapter(items_to_hash,
|
||||||
|
(cmph_uint32)sizeof(cmph_uint32),
|
||||||
|
0,
|
||||||
|
(cmph_uint32)sizeof(cmph_uint32),
|
||||||
|
items_len);
|
||||||
|
config = cmph_config_new(source);
|
||||||
|
cmph_config_set_algo(config, alg_n);
|
||||||
|
if (alg_n == CMPH_BRZ) {
|
||||||
|
sprintf(filename, "%s_%u.mph", cmph_names[alg_n], items_len);
|
||||||
|
mphf_fd = fopen(filename, "w");
|
||||||
|
cmph_config_set_mphf_fd(config, mphf_fd);
|
||||||
|
}
|
||||||
|
hash = cmph_new(config);
|
||||||
|
cmph_config_destroy(config);
|
||||||
|
|
||||||
|
if (alg_n == CMPH_BRZ) {
|
||||||
|
cmph_dump(hash, mphf_fd);
|
||||||
|
cmph_destroy(hash);
|
||||||
|
fclose(mphf_fd);
|
||||||
|
mphf_fd = fopen(filename, "r");
|
||||||
|
hash = cmph_load(mphf_fd);
|
||||||
|
}
|
||||||
|
printf("packed_size %u\n",cmph_packed_size(hash));
|
||||||
|
|
||||||
|
for (i=0; i<items_len; ++i)
|
||||||
|
printf("%d -> %u\n",
|
||||||
|
items_to_hash[i],
|
||||||
|
cmph_search(hash,
|
||||||
|
(char*)(items_to_hash+i),
|
||||||
|
(cmph_uint32)sizeof(cmph_uint32)));
|
||||||
|
printf("\n");
|
||||||
|
|
||||||
|
cmph_io_vector_adapter_destroy(source);
|
||||||
|
cmph_destroy(hash);
|
||||||
|
|
||||||
|
if (alg_n == CMPH_BRZ) {
|
||||||
|
fclose(mphf_fd);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main (void)
|
||||||
|
{
|
||||||
|
cmph_uint32 vec1[] = {1,2,3,4,5};
|
||||||
|
cmph_uint32 vec1_len = 5;
|
||||||
|
|
||||||
|
cmph_uint32 vec2[] = {7576423, 7554496}; //CMPH_FCH, CMPH_BDZ, CMPH_BDZ_PH (4,5,6)
|
||||||
|
cmph_uint32 vec2_len = 2;
|
||||||
|
cmph_uint32 vec3[] = {2184764, 1882984, 1170551}; // CMPH_CHD_PH, CMPH_CHD (7,8)
|
||||||
|
cmph_uint32 vec3_len = 3;
|
||||||
|
cmph_uint32 vec4[] = {2184764}; // CMPH_CHD_PH, CMPH_CHD (7,8)
|
||||||
|
cmph_uint32 vec4_len = 1;
|
||||||
|
cmph_uint32 i;
|
||||||
|
|
||||||
|
// Testing with vec1
|
||||||
|
cmph_uint32* values = (cmph_uint32*)vec1;
|
||||||
|
cmph_uint32 length = vec1_len;
|
||||||
|
printf("TESTING VECTOR WITH %u INTEGERS\n", length);
|
||||||
|
for (i = 0; i < CMPH_COUNT; i++)
|
||||||
|
{
|
||||||
|
test(values, length, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Testing with vec2
|
||||||
|
values = (cmph_uint32*)vec2;
|
||||||
|
length = vec2_len;
|
||||||
|
printf("TESTING VECTOR WITH %u INTEGERS\n", length);
|
||||||
|
for (i = 0; i < CMPH_COUNT; i++)
|
||||||
|
{
|
||||||
|
test(values, length, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Testing with vec3
|
||||||
|
values = (cmph_uint32*)vec3;
|
||||||
|
length = vec3_len;
|
||||||
|
printf("TESTING VECTOR WITH %u INTEGERS\n", length);
|
||||||
|
for (i = 0; i < CMPH_COUNT; i++)
|
||||||
|
{
|
||||||
|
test(values, length, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Testing with vec4
|
||||||
|
values = (cmph_uint32*)vec4;
|
||||||
|
length = vec4_len;
|
||||||
|
printf("TESTING VECTOR WITH %u INTEGERS\n", length);
|
||||||
|
for (i = 0; i < CMPH_COUNT; i++)
|
||||||
|
{
|
||||||
|
test(values, length, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
@ -288,6 +288,11 @@ cmph_t *bdz_new(cmph_config_t *mph, double c)
|
|||||||
bdz->m = mph->key_source->nkeys;
|
bdz->m = mph->key_source->nkeys;
|
||||||
bdz->r = (cmph_uint32)ceil((c * mph->key_source->nkeys)/3);
|
bdz->r = (cmph_uint32)ceil((c * mph->key_source->nkeys)/3);
|
||||||
if ((bdz->r % 2) == 0) bdz->r+=1;
|
if ((bdz->r % 2) == 0) bdz->r+=1;
|
||||||
|
|
||||||
|
if (bdz->r == 1) { // workaround for small key sets
|
||||||
|
bdz->r = 3;
|
||||||
|
}
|
||||||
|
|
||||||
bdz->n = 3*bdz->r;
|
bdz->n = 3*bdz->r;
|
||||||
|
|
||||||
bdz->k = (1U << bdz->b);
|
bdz->k = (1U << bdz->b);
|
||||||
|
@ -254,6 +254,11 @@ cmph_t *bdz_ph_new(cmph_config_t *mph, double c)
|
|||||||
bdz_ph->m = mph->key_source->nkeys;
|
bdz_ph->m = mph->key_source->nkeys;
|
||||||
bdz_ph->r = (cmph_uint32)ceil((c * mph->key_source->nkeys)/3);
|
bdz_ph->r = (cmph_uint32)ceil((c * mph->key_source->nkeys)/3);
|
||||||
if ((bdz_ph->r % 2) == 0) bdz_ph->r += 1;
|
if ((bdz_ph->r % 2) == 0) bdz_ph->r += 1;
|
||||||
|
|
||||||
|
if (bdz_ph->r == 1) { // workaround for small key sets
|
||||||
|
bdz_ph->r = 3;
|
||||||
|
}
|
||||||
|
|
||||||
bdz_ph->n = 3*bdz_ph->r;
|
bdz_ph->n = 3*bdz_ph->r;
|
||||||
|
|
||||||
|
|
||||||
|
10
src/bmz.c
10
src/bmz.c
@ -70,6 +70,12 @@ cmph_t *bmz_new(cmph_config_t *mph, double c)
|
|||||||
DEBUGP("c: %f\n", c);
|
DEBUGP("c: %f\n", c);
|
||||||
bmz->m = mph->key_source->nkeys;
|
bmz->m = mph->key_source->nkeys;
|
||||||
bmz->n = (cmph_uint32)ceil(c * mph->key_source->nkeys);
|
bmz->n = (cmph_uint32)ceil(c * mph->key_source->nkeys);
|
||||||
|
|
||||||
|
if (bmz->n < 5) // workaround for small key sets
|
||||||
|
{
|
||||||
|
bmz->n = 5;
|
||||||
|
}
|
||||||
|
|
||||||
DEBUGP("m (edges): %u n (vertices): %u c: %f\n", bmz->m, bmz->n, c);
|
DEBUGP("m (edges): %u n (vertices): %u c: %f\n", bmz->m, bmz->n, c);
|
||||||
bmz->graph = graph_new(bmz->n, bmz->m);
|
bmz->graph = graph_new(bmz->n, bmz->m);
|
||||||
DEBUGP("Created graph\n");
|
DEBUGP("Created graph\n");
|
||||||
@ -530,7 +536,7 @@ cmph_uint32 bmz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
|
|||||||
cmph_uint32 h1 = hash(bmz->hashes[0], key, keylen) % bmz->n;
|
cmph_uint32 h1 = hash(bmz->hashes[0], key, keylen) % bmz->n;
|
||||||
cmph_uint32 h2 = hash(bmz->hashes[1], key, keylen) % bmz->n;
|
cmph_uint32 h2 = hash(bmz->hashes[1], key, keylen) % bmz->n;
|
||||||
DEBUGP("key: %.*s h1: %u h2: %u\n", keylen, key, h1, h2);
|
DEBUGP("key: %.*s h1: %u h2: %u\n", keylen, key, h1, h2);
|
||||||
if (h1 == h2 && ++h2 > bmz->n) h2 = 0;
|
if (h1 == h2 && ++h2 >= bmz->n) h2 = 0;
|
||||||
DEBUGP("key: %.*s g[h1]: %u g[h2]: %u edges: %u\n", keylen, key, bmz->g[h1], bmz->g[h2], bmz->m);
|
DEBUGP("key: %.*s g[h1]: %u g[h2]: %u edges: %u\n", keylen, key, bmz->g[h1], bmz->g[h2], bmz->m);
|
||||||
return bmz->g[h1] + bmz->g[h2];
|
return bmz->g[h1] + bmz->g[h2];
|
||||||
}
|
}
|
||||||
@ -620,6 +626,6 @@ cmph_uint32 bmz_search_packed(void *packed_mphf, const char *key, cmph_uint32 ke
|
|||||||
|
|
||||||
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n;
|
register cmph_uint32 h1 = hash_packed(h1_ptr, h1_type, key, keylen) % n;
|
||||||
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n;
|
register cmph_uint32 h2 = hash_packed(h2_ptr, h2_type, key, keylen) % n;
|
||||||
if (h1 == h2 && ++h2 > n) h2 = 0;
|
if (h1 == h2 && ++h2 >= n) h2 = 0;
|
||||||
return (g_ptr[h1] + g_ptr[h2]);
|
return (g_ptr[h1] + g_ptr[h2]);
|
||||||
}
|
}
|
||||||
|
@ -74,6 +74,12 @@ cmph_t *bmz8_new(cmph_config_t *mph, double c)
|
|||||||
DEBUGP("c: %f\n", c);
|
DEBUGP("c: %f\n", c);
|
||||||
bmz8->m = (cmph_uint8) mph->key_source->nkeys;
|
bmz8->m = (cmph_uint8) mph->key_source->nkeys;
|
||||||
bmz8->n = (cmph_uint8) ceil(c * mph->key_source->nkeys);
|
bmz8->n = (cmph_uint8) ceil(c * mph->key_source->nkeys);
|
||||||
|
|
||||||
|
if (bmz8->n < 5) // workaround for small key sets
|
||||||
|
{
|
||||||
|
bmz8->n = 5;
|
||||||
|
}
|
||||||
|
|
||||||
DEBUGP("m (edges): %u n (vertices): %u c: %f\n", bmz8->m, bmz8->n, c);
|
DEBUGP("m (edges): %u n (vertices): %u c: %f\n", bmz8->m, bmz8->n, c);
|
||||||
bmz8->graph = graph_new(bmz8->n, bmz8->m);
|
bmz8->graph = graph_new(bmz8->n, bmz8->m);
|
||||||
DEBUGP("Created graph\n");
|
DEBUGP("Created graph\n");
|
||||||
|
44
src/brz.c
44
src/brz.c
@ -131,6 +131,15 @@ cmph_t *brz_new(cmph_config_t *mph, double c)
|
|||||||
|
|
||||||
DEBUGP("c: %f\n", c);
|
DEBUGP("c: %f\n", c);
|
||||||
brz_config_data_t *brz = (brz_config_data_t *)mph->data;
|
brz_config_data_t *brz = (brz_config_data_t *)mph->data;
|
||||||
|
|
||||||
|
// Since we keep dumping partial pieces of the MPHF as it gets created
|
||||||
|
// the caller must set the file to store the resulting MPHF before calling
|
||||||
|
// this function.
|
||||||
|
if (brz->mphf_fd == NULL)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
switch(brz->algo) // validating restrictions over parameter c.
|
switch(brz->algo) // validating restrictions over parameter c.
|
||||||
{
|
{
|
||||||
case CMPH_BMZ8:
|
case CMPH_BMZ8:
|
||||||
@ -144,6 +153,11 @@ cmph_t *brz_new(cmph_config_t *mph, double c)
|
|||||||
}
|
}
|
||||||
brz->c = c;
|
brz->c = c;
|
||||||
brz->m = mph->key_source->nkeys;
|
brz->m = mph->key_source->nkeys;
|
||||||
|
if (brz->m < 5)
|
||||||
|
{
|
||||||
|
brz->c = 5;
|
||||||
|
}
|
||||||
|
|
||||||
DEBUGP("m: %u\n", brz->m);
|
DEBUGP("m: %u\n", brz->m);
|
||||||
brz->k = (cmph_uint32)ceil(brz->m/((double)brz->b));
|
brz->k = (cmph_uint32)ceil(brz->m/((double)brz->b));
|
||||||
DEBUGP("k: %u\n", brz->k);
|
DEBUGP("k: %u\n", brz->k);
|
||||||
@ -364,7 +378,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
|
|||||||
{
|
{
|
||||||
fprintf(stderr, "\nMPHF generation \n");
|
fprintf(stderr, "\nMPHF generation \n");
|
||||||
}
|
}
|
||||||
/* Starting to dump to disk the resultant MPHF: __cmph_dump function */
|
/* Starting to dump to disk the resulting MPHF: __cmph_dump function */
|
||||||
nbytes = fwrite(cmph_names[CMPH_BRZ], (size_t)(strlen(cmph_names[CMPH_BRZ]) + 1), (size_t)1, brz->mphf_fd);
|
nbytes = fwrite(cmph_names[CMPH_BRZ], (size_t)(strlen(cmph_names[CMPH_BRZ]) + 1), (size_t)1, brz->mphf_fd);
|
||||||
nbytes = fwrite(&(brz->m), sizeof(brz->m), (size_t)1, brz->mphf_fd);
|
nbytes = fwrite(&(brz->m), sizeof(brz->m), (size_t)1, brz->mphf_fd);
|
||||||
nbytes = fwrite(&(brz->c), sizeof(double), (size_t)1, brz->mphf_fd);
|
nbytes = fwrite(&(brz->c), sizeof(double), (size_t)1, brz->mphf_fd);
|
||||||
@ -442,7 +456,7 @@ static int brz_gen_mphf(cmph_config_t *mph)
|
|||||||
source = cmph_io_byte_vector_adapter(keys_vd, (cmph_uint32)nkeys_vd);
|
source = cmph_io_byte_vector_adapter(keys_vd, (cmph_uint32)nkeys_vd);
|
||||||
config = cmph_config_new(source);
|
config = cmph_config_new(source);
|
||||||
cmph_config_set_algo(config, brz->algo);
|
cmph_config_set_algo(config, brz->algo);
|
||||||
//cmph_config_set_algo(config, CMPH_BMZ8);
|
cmph_config_set_hashfuncs(config, brz->hashfuncs);
|
||||||
cmph_config_set_graphsize(config, brz->c);
|
cmph_config_set_graphsize(config, brz->c);
|
||||||
mphf_tmp = cmph_new(config);
|
mphf_tmp = cmph_new(config);
|
||||||
if (mphf_tmp == NULL)
|
if (mphf_tmp == NULL)
|
||||||
@ -565,7 +579,7 @@ int brz_dump(cmph_t *mphf, FILE *fd)
|
|||||||
cmph_uint32 buflen;
|
cmph_uint32 buflen;
|
||||||
register size_t nbytes;
|
register size_t nbytes;
|
||||||
DEBUGP("Dumping brzf\n");
|
DEBUGP("Dumping brzf\n");
|
||||||
// The initial part of the MPHF have already been dumped to disk during construction
|
// The initial part of the MPHF has already been dumped to disk during construction
|
||||||
// Dumping h0
|
// Dumping h0
|
||||||
hash_state_dump(data->h0, &buf, &buflen);
|
hash_state_dump(data->h0, &buf, &buflen);
|
||||||
DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
|
DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
|
||||||
@ -731,6 +745,12 @@ void brz_pack(cmph_t *mphf, void *packed_mphf)
|
|||||||
cmph_uint8 * ptr = (cmph_uint8 *)packed_mphf;
|
cmph_uint8 * ptr = (cmph_uint8 *)packed_mphf;
|
||||||
cmph_uint32 i,n;
|
cmph_uint32 i,n;
|
||||||
|
|
||||||
|
// This assumes that if one function pointer is NULL,
|
||||||
|
// all the others will be as well.
|
||||||
|
if (data->h1 == NULL)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
// packing internal algo type
|
// packing internal algo type
|
||||||
memcpy(ptr, &(data->algo), sizeof(data->algo));
|
memcpy(ptr, &(data->algo), sizeof(data->algo));
|
||||||
ptr += sizeof(data->algo);
|
ptr += sizeof(data->algo);
|
||||||
@ -821,9 +841,21 @@ cmph_uint32 brz_packed_size(cmph_t *mphf)
|
|||||||
cmph_uint32 i;
|
cmph_uint32 i;
|
||||||
cmph_uint32 size = 0;
|
cmph_uint32 size = 0;
|
||||||
brz_data_t *data = (brz_data_t *)mphf->data;
|
brz_data_t *data = (brz_data_t *)mphf->data;
|
||||||
CMPH_HASH h0_type = hash_get_type(data->h0);
|
CMPH_HASH h0_type;
|
||||||
CMPH_HASH h1_type = hash_get_type(data->h1[0]);
|
CMPH_HASH h1_type;
|
||||||
CMPH_HASH h2_type = hash_get_type(data->h2[0]);
|
CMPH_HASH h2_type;
|
||||||
|
|
||||||
|
// This assumes that if one function pointer is NULL,
|
||||||
|
// all the others will be as well.
|
||||||
|
if (data->h1 == NULL)
|
||||||
|
{
|
||||||
|
return 0U;
|
||||||
|
}
|
||||||
|
|
||||||
|
h0_type = hash_get_type(data->h0);
|
||||||
|
h1_type = hash_get_type(data->h1[0]);
|
||||||
|
h2_type = hash_get_type(data->h2[0]);
|
||||||
|
|
||||||
size = (cmph_uint32)(2*sizeof(CMPH_ALGO) + 3*sizeof(CMPH_HASH) + hash_state_packed_size(h0_type) + sizeof(cmph_uint32) +
|
size = (cmph_uint32)(2*sizeof(CMPH_ALGO) + 3*sizeof(CMPH_HASH) + hash_state_packed_size(h0_type) + sizeof(cmph_uint32) +
|
||||||
sizeof(double) + sizeof(cmph_uint8)*data->k + sizeof(cmph_uint32)*data->k);
|
sizeof(double) + sizeof(cmph_uint8)*data->k + sizeof(cmph_uint32)*data->k);
|
||||||
// pointers to g_is
|
// pointers to g_is
|
||||||
|
15
src/brz.h
15
src/brz.h
@ -3,6 +3,21 @@
|
|||||||
|
|
||||||
#include "cmph.h"
|
#include "cmph.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The BRZ algorithm has been built so to consume the bare minimum
|
||||||
|
* amount of memory to generate the MPHFs. Thereby we decided
|
||||||
|
* to dump the resulting MPHFs to disk while creating them. Thus,
|
||||||
|
* to use the BRZ algorithm, one has to call brz_config_set_mphf_fd
|
||||||
|
* before calling brz_new. Otherwise we will fail the MPHF creation.
|
||||||
|
* One side effect of this design decision is that the resulting
|
||||||
|
* MPHF cannot be used until its dumping process is finalized
|
||||||
|
* by calling brz_dump and the caller must use brz_load before
|
||||||
|
* any call to either one of the following functions is made:
|
||||||
|
* brz_search
|
||||||
|
* brz_pack
|
||||||
|
* brz_packed_size
|
||||||
|
* brz_search_packed
|
||||||
|
*/
|
||||||
typedef struct __brz_data_t brz_data_t;
|
typedef struct __brz_data_t brz_data_t;
|
||||||
typedef struct __brz_config_data_t brz_config_data_t;
|
typedef struct __brz_config_data_t brz_config_data_t;
|
||||||
|
|
||||||
|
11
src/chd_ph.c
11
src/chd_ph.c
@ -627,7 +627,8 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
|
|||||||
|
|
||||||
register double load_factor = c;
|
register double load_factor = c;
|
||||||
register cmph_uint8 searching_success = 0;
|
register cmph_uint8 searching_success = 0;
|
||||||
register cmph_uint32 max_probes = 1 << 20; // default value for max_probes
|
register cmph_uint32 max_probes_default = 1 << 20; // default value for max_probes
|
||||||
|
register cmph_uint32 max_probes;
|
||||||
register cmph_uint32 iterations = 100;
|
register cmph_uint32 iterations = 100;
|
||||||
chd_ph_bucket_t * buckets = NULL;
|
chd_ph_bucket_t * buckets = NULL;
|
||||||
chd_ph_item_t * items = NULL;
|
chd_ph_item_t * items = NULL;
|
||||||
@ -688,7 +689,13 @@ cmph_t *chd_ph_new(cmph_config_t *mph, double c)
|
|||||||
buckets = chd_ph_bucket_new(chd_ph->nbuckets);
|
buckets = chd_ph_bucket_new(chd_ph->nbuckets);
|
||||||
items = (chd_ph_item_t *) calloc(chd_ph->m, sizeof(chd_ph_item_t));
|
items = (chd_ph_item_t *) calloc(chd_ph->m, sizeof(chd_ph_item_t));
|
||||||
|
|
||||||
max_probes = (cmph_uint32)(((log(chd_ph->m)/log(2))/20) * max_probes);
|
max_probes = (cmph_uint32)((log(chd_ph->m)/log(2))/20);
|
||||||
|
|
||||||
|
if (max_probes == 0) {
|
||||||
|
max_probes = max_probes_default;
|
||||||
|
} else {
|
||||||
|
max_probes = max_probes * max_probes_default;
|
||||||
|
}
|
||||||
|
|
||||||
if(chd_ph->keys_per_bin == 1)
|
if(chd_ph->keys_per_bin == 1)
|
||||||
chd_ph->occup_table = (cmph_uint8 *) calloc(((chd_ph->n + 31)/32), sizeof(cmph_uint32));
|
chd_ph->occup_table = (cmph_uint8 *) calloc(((chd_ph->n + 31)/32), sizeof(cmph_uint32));
|
||||||
|
@ -96,11 +96,15 @@ static int key_struct_vector_read(void *data, char **key, cmph_uint32 *keylen)
|
|||||||
{
|
{
|
||||||
cmph_struct_vector_t *cmph_struct_vector = (cmph_struct_vector_t *)data;
|
cmph_struct_vector_t *cmph_struct_vector = (cmph_struct_vector_t *)data;
|
||||||
char *keys_vd = (char *)cmph_struct_vector->vector;
|
char *keys_vd = (char *)cmph_struct_vector->vector;
|
||||||
|
cmph_uint64 keys_vd_offset;
|
||||||
size_t size;
|
size_t size;
|
||||||
*keylen = cmph_struct_vector->key_len;
|
*keylen = cmph_struct_vector->key_len;
|
||||||
size = *keylen;
|
size = *keylen;
|
||||||
*key = (char *)malloc(size);
|
*key = (char *)malloc(size);
|
||||||
memcpy(*key, (keys_vd + (cmph_struct_vector->position * cmph_struct_vector->struct_size) + cmph_struct_vector->key_offset), size);
|
keys_vd_offset = ((cmph_uint64)cmph_struct_vector->position *
|
||||||
|
(cmph_uint64)cmph_struct_vector->struct_size) +
|
||||||
|
(cmph_uint64)cmph_struct_vector->key_offset;
|
||||||
|
memcpy(*key, keys_vd + keys_vd_offset, size);
|
||||||
cmph_struct_vector->position = cmph_struct_vector->position + 1;
|
cmph_struct_vector->position = cmph_struct_vector->position + 1;
|
||||||
return (int)(*keylen);
|
return (int)(*keylen);
|
||||||
}
|
}
|
||||||
|
@ -38,13 +38,18 @@ cmph_t *__cmph_load(FILE *f)
|
|||||||
register size_t nbytes;
|
register size_t nbytes;
|
||||||
|
|
||||||
DEBUGP("Loading mphf\n");
|
DEBUGP("Loading mphf\n");
|
||||||
while(1)
|
for(i = 0; i < BUFSIZ; i++)
|
||||||
{
|
{
|
||||||
size_t c = fread(ptr, (size_t)1, (size_t)1, f);
|
size_t c = fread(ptr, (size_t)1, (size_t)1, f);
|
||||||
if (c != 1) return NULL;
|
if (c != 1) return NULL;
|
||||||
if (*ptr == 0) break;
|
if (*ptr == 0) break;
|
||||||
++ptr;
|
++ptr;
|
||||||
}
|
}
|
||||||
|
if(algo_name[i] != 0)
|
||||||
|
{
|
||||||
|
DEBUGP("Attempted buffer overflow while loading mph file\n");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
for(i = 0; i < CMPH_COUNT; ++i)
|
for(i = 0; i < CMPH_COUNT; ++i)
|
||||||
{
|
{
|
||||||
if (strcmp(algo_name, cmph_names[i]) == 0)
|
if (strcmp(algo_name, cmph_names[i]) == 0)
|
||||||
|
@ -98,7 +98,7 @@ void jenkins_state_destroy(jenkins_state_t *state)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static inline void __jenkins_hash_vector(cmph_uint32 seed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
|
static inline void __jenkins_hash_vector(cmph_uint32 seed, const unsigned char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
|
||||||
{
|
{
|
||||||
register cmph_uint32 len, length;
|
register cmph_uint32 len, length;
|
||||||
|
|
||||||
@ -154,7 +154,7 @@ static inline void __jenkins_hash_vector(cmph_uint32 seed, const char *k, cmph_u
|
|||||||
cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keylen)
|
cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keylen)
|
||||||
{
|
{
|
||||||
cmph_uint32 hashes[3];
|
cmph_uint32 hashes[3];
|
||||||
__jenkins_hash_vector(state->seed, k, keylen, hashes);
|
__jenkins_hash_vector(state->seed, (const unsigned char*)k, keylen, hashes);
|
||||||
return hashes[2];
|
return hashes[2];
|
||||||
/* cmph_uint32 a, b, c;
|
/* cmph_uint32 a, b, c;
|
||||||
cmph_uint32 len, length;
|
cmph_uint32 len, length;
|
||||||
@ -215,7 +215,7 @@ cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keyl
|
|||||||
|
|
||||||
void jenkins_hash_vector_(jenkins_state_t *state, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
|
void jenkins_hash_vector_(jenkins_state_t *state, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
|
||||||
{
|
{
|
||||||
__jenkins_hash_vector(state->seed, k, keylen, hashes);
|
__jenkins_hash_vector(state->seed, (const unsigned char*)k, keylen, hashes);
|
||||||
}
|
}
|
||||||
|
|
||||||
void jenkins_state_dump(jenkins_state_t *state, char **buf, cmph_uint32 *buflen)
|
void jenkins_state_dump(jenkins_state_t *state, char **buf, cmph_uint32 *buflen)
|
||||||
@ -282,7 +282,7 @@ cmph_uint32 jenkins_state_packed_size(void)
|
|||||||
cmph_uint32 jenkins_hash_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen)
|
cmph_uint32 jenkins_hash_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen)
|
||||||
{
|
{
|
||||||
cmph_uint32 hashes[3];
|
cmph_uint32 hashes[3];
|
||||||
__jenkins_hash_vector(*((cmph_uint32 *)jenkins_packed), k, keylen, hashes);
|
__jenkins_hash_vector(*((cmph_uint32 *)jenkins_packed), (const unsigned char*)k, keylen, hashes);
|
||||||
return hashes[2];
|
return hashes[2];
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -294,5 +294,5 @@ cmph_uint32 jenkins_hash_packed(void *jenkins_packed, const char *k, cmph_uint32
|
|||||||
*/
|
*/
|
||||||
void jenkins_hash_vector_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
|
void jenkins_hash_vector_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
|
||||||
{
|
{
|
||||||
__jenkins_hash_vector(*((cmph_uint32 *)jenkins_packed), k, keylen, hashes);
|
__jenkins_hash_vector(*((cmph_uint32 *)jenkins_packed), (const unsigned char*)k, keylen, hashes);
|
||||||
}
|
}
|
||||||
|
@ -2,7 +2,7 @@ TESTS = $(check_PROGRAMS)
|
|||||||
check_PROGRAMS = graph_tests select_tests compressed_seq_tests compressed_rank_tests cmph_benchmark_test
|
check_PROGRAMS = graph_tests select_tests compressed_seq_tests compressed_rank_tests cmph_benchmark_test
|
||||||
noinst_PROGRAMS = packed_mphf_tests mphf_tests
|
noinst_PROGRAMS = packed_mphf_tests mphf_tests
|
||||||
|
|
||||||
INCLUDES = -I../src/
|
AM_CPPFLAGS = -I../src/
|
||||||
|
|
||||||
graph_tests_SOURCES = graph_tests.c
|
graph_tests_SOURCES = graph_tests.c
|
||||||
graph_tests_LDADD = ../src/libcmph.la
|
graph_tests_LDADD = ../src/libcmph.la
|
||||||
|
Loading…
Reference in New Issue
Block a user