From e760465fca0e494e20815c2e74bd64f6971da13c Mon Sep 17 00:00:00 2001 From: Davi Reis Date: Mon, 19 Mar 2012 22:48:11 -0300 Subject: [PATCH] Some comments. --- cxxmph/mph_bits.h | 2 ++ cxxmph/mph_index.h | 10 +++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/cxxmph/mph_bits.h b/cxxmph/mph_bits.h index 586e42b..c9eaabb 100644 --- a/cxxmph/mph_bits.h +++ b/cxxmph/mph_bits.h @@ -67,6 +67,8 @@ static uint32_t nextpoweroftwo(uint32_t k) { // http://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord // Fast a % (k*2^t) // http://www.azillionmonkeys.com/qed/adiv.html +// rank and select: +// http://vigna.dsi.unimi.it/ftp/papers/Broadword.pdf } // namespace cxxmph diff --git a/cxxmph/mph_index.h b/cxxmph/mph_index.h index 17ad3e5..2a217bc 100644 --- a/cxxmph/mph_index.h +++ b/cxxmph/mph_index.h @@ -15,6 +15,10 @@ // traditional hash function over a key and doing 2-3 conflict resolutions on // 100byte-ish strings. // +// Thesis presenting this and similar algorithms: +// http://homepages.dcc.ufmg.br/~fbotelho/en/talks/thesis2008/thesis.pdf +// +// // Notes: // // Most users can use the SimpleMPHIndex wrapper instead of the MPHIndex which @@ -80,7 +84,11 @@ class MPHIndex { uint32_t Rank(uint32_t vertex) const; // Algorithm parameters - double c_; // Number of bits per key (? is it right) + // Perfect hash function density. If this was a 2graph, + // then probability of having an acyclic graph would be + // sqrt(1-(2/c)^2). See section 3 for details. + // http://www.it-c.dk/people/pagh/papers/simpleperf.pdf + double c_; uint8_t b_; // Number of bits of the kth index in the ranktable // Values used during generation