diff --git a/cxxmph/Makefile.am b/cxxmph/Makefile.am index 22c0bb2..db8ffa1 100644 --- a/cxxmph/Makefile.am +++ b/cxxmph/Makefile.am @@ -1,5 +1,5 @@ TESTS = $(check_PROGRAMS) -check_PROGRAMS = hollow_iterator_test mph_map_test mph_index_test trigraph_test +check_PROGRAMS = mph_bits_test hollow_iterator_test mph_map_test mph_index_test trigraph_test noinst_PROGRAMS = bm_index bm_map bin_PROGRAMS = cxxmph lib_LTLIBRARIES = libcxxmph.la @@ -27,4 +27,6 @@ cxxmph_LDADD = libcxxmph.la cxxmph_SOURCES = cxxmph.cc hollow_iterator_test_SOURCES = hollow_iterator_test.cc +mph_bits_test_SOURCES = mph_bits_test.cc +mph_bits_test_LDADD = libcxxmph.la diff --git a/cxxmph/mph_bits.cc b/cxxmph/mph_bits.cc index 9fb97bd..510572c 100644 --- a/cxxmph/mph_bits.cc +++ b/cxxmph/mph_bits.cc @@ -1,4 +1,7 @@ #include "mph_bits.h" namespace cxxmph { + +const uint8_t dynamic_2bitset::vmask[] = { 0xfc, 0xf3, 0xcf, 0x3f}; + } diff --git a/cxxmph/mph_bits.h b/cxxmph/mph_bits.h index 6de8168..7dcf0be 100644 --- a/cxxmph/mph_bits.h +++ b/cxxmph/mph_bits.h @@ -2,13 +2,63 @@ #define __CXXMPH_MPH_BITS_H__ #include // for uint32_t and friends +#include #include +#include +#include +#include +#include namespace cxxmph { -static const uint8_t valuemask[] = { 0xfc, 0xf3, 0xcf, 0x3f}; +class dynamic_2bitset { + public: + dynamic_2bitset() : data_(NULL), size_(0), one_initialized_(false) {} + dynamic_2bitset(uint32_t size, bool one_initialized = false) + : data_(NULL), size_(0), one_initialized_(one_initialized) { + resize(size); + } + ~dynamic_2bitset() { delete [] data_; } + + const uint8_t operator[](uint32_t i) const { return get(i); } + uint8_t get(uint32_t i) const { + return (data_[(i >> 2)] >> (((i & 3) << 1)) & 3); + } + uint8_t set(uint32_t i, uint8_t v) { + uint8_t sf = ((v << ((i & 3) << 1)) | dynamic_2bitset::vmask[i & 3]); + fprintf(stderr, "v %d sf %d\n", v, sf); + data_[(i >> 2)] &= ((v << ((i & 3) << 1)) | dynamic_2bitset::vmask[i & 3]); + assert(get(i) == v); + } + void resize(uint32_t size) { + uint8_t* new_data = new uint8_t[size << 2]; + assert(one_initialized_); + assert(one_initialized_ * ones() == ones()); + memset(new_data, one_initialized_*ones(), size << 2); + assert(new_data[0] == ones()); + uint8_t* old_data_ = data_; + for (int i = 0; i < size_; ++i) { + data_ = old_data_; + auto v = get(i); + data_ = new_data; + set(i, v); + } + size_ = size; + delete [] old_data_; + data_ = new_data; + assert(data_[0] == ones()); + assert(get(0) == 3); + } + static const uint8_t vmask[]; + private: + uint8_t* data_; + uint32_t size_; + bool one_initialized_; + uint8_t ones() { return std::numeric_limits::max(); } +}; + static void set_2bit_value(uint8_t *d, uint32_t i, uint8_t v) { - d[(i >> 2)] &= ((v << ((i & 3) << 1)) | valuemask[i & 3]); + d[(i >> 2)] &= ((v << ((i & 3) << 1)) | dynamic_2bitset::vmask[i & 3]); } static uint32_t get_2bit_value(const uint8_t* d, uint32_t i) { return (d[(i >> 2)] >> (((i & 3) << 1)) & 3); diff --git a/cxxmph/mph_map.h b/cxxmph/mph_map.h index 6886cb3..caddf12 100644 --- a/cxxmph/mph_map.h +++ b/cxxmph/mph_map.h @@ -126,7 +126,7 @@ class mph_map { std::vector present_; std::vector nests_; SimpleMPHIndex::hash_function> index_; - // TODO(davi) optimize slack to no hold a copy of the key + // TODO(davi) optimize slack to hold 128 unique bits from hash64 as key typedef unordered_map slack_type; slack_type slack_; size_type size_;