Documentation updated for release 0.9

This commit is contained in:
Fabiano C. Botelho
2009-06-12 21:49:26 -03:00
parent b8aa2106e9
commit 088389184f
274 changed files with 2951 additions and 217 deletions

140
tex/bdz/bdz.bib Executable file
View File

@@ -0,0 +1,140 @@
@inproceedings{bpz07,
author = {F.C. Botelho and R. Pagh and N. Ziviani},
title = {Simple and Space-Efficient Minimal Perfect Hash Functions},
booktitle = {Proceedings of the 10th Workshop on Algorithms and Data Structures (WADs'07)},
publisher = {Springer LNCS vol. 4619},
pages = {139-150},
Moth = August,
location = {Halifax, Canada},
year = 2007,
key = {author}
}
@PhdThesis{b08,
author = {F. C. Botelho},
title = {Near-Optimal Space Perfect Hashing Algorithms},
school = {Federal University of Minas Gerais},
year = {2008},
OPTkey = {},
OPTtype = {},
OPTaddress = {},
month = {September},
note = {Supervised by Nivio Ziviani, \url{http://www.dcc.ufmg.br/pos/cursos/defesas/255D.PDF}},
OPTannote = {},
OPTurl = {http://www.dcc.ufmg.br/pos/cursos/defesas/255D.PDF},
OPTdoi = {},
OPTissn = {},
OPTlocalfile = {},
OPTabstract = {}
}
@Article{mwhc96,
author = {B.S. Majewski and N.C. Wormald and G. Havas and Z.J. Czech},
title = {A family of perfect hashing methods},
journal = {The Computer Journal},
year = {1996},
volume = {39},
number = {6},
pages = {547-554},
key = {author}
}
@inproceedings{ckrt04,
author = {B. Chazelle and J. Kilian and R. Rubinfeld and A. Tal},
title = {The Bloomier Filter: An Efficient Data Structure for Static Support Lookup Tables},
booktitle = {Proceedings of the 15th annual ACM-SIAM symposium on Discrete algorithms (SODA'04)},
year = {2004},
isbn = {0-89871-558-X},
pages = {30--39},
location = {New Orleans, Louisiana},
publisher = {Society for Industrial and Applied Mathematics},
address = {Philadelphia, PA, USA},
optpublisher = {Society for Industrial and Applied Mathematics}
}
@Article{j97,
author = {B. Jenkins},
title = {Algorithm Alley: Hash Functions},
journal = {Dr. Dobb's Journal of Software Tools},
volume = {22},
number = {9},
month = {september},
year = {1997},
note = {Extended version available at \url{http://burtleburtle.net/bob/hash/doobs.html}}
}
@Article{e87,
author = {J. Ebert},
title = {A Versatile Data Structure for Edges Oriented Graph Algorithms},
journal = {Communication of The ACM},
year = {1987},
OPTkey = {},
OPTvolume = {},
number = {30},
pages = {513-519},
OPTmonth = {},
OPTnote = {},
OPTannote = {}
}
@article {dict-jour,
AUTHOR = {R. Pagh},
TITLE = {Low Redundancy in Static Dictionaries with Constant Query Time},
OPTJOURNAL = sicomp,
JOURNAL = fsicomp,
VOLUME = {31},
YEAR = {2001},
NUMBER = {2},
PAGES = {353--363},
}
@inproceedings{sg06,
author = {K. Sadakane and R. Grossi},
title = {Squeezing succinct data structures into entropy bounds},
booktitle = {Proceedings of the 17th annual ACM-SIAM symposium on Discrete algorithms (SODA'06)},
year = {2006},
pages = {1230--1239}
}
@inproceedings{gn06,
author = {R. Gonzalez and
G. Navarro},
title = {Statistical Encoding of Succinct Data Structures},
booktitle = {Proceedings of the 19th Annual Symposium on Combinatorial Pattern Matching (CPM'06)},
year = {2006},
pages = {294--305}
}
@inproceedings{fn07,
author = {K. Fredriksson and
F. Nikitin},
title = {Simple Compression Code Supporting Random Access and Fast
String Matching},
booktitle = {Proceedings of the 6th International Workshop on Efficient and Experimental Algorithms (WEA'07)},
year = {2007},
pages = {203--216}
}
@inproceedings{os07,
author = {D. Okanohara and K. Sadakane},
title = {Practical Entropy-Compressed Rank/Select Dictionary},
booktitle = {Proceedings of the Workshop on Algorithm Engineering and
Experiments (ALENEX'07)},
year = {2007},
location = {New Orleans, Louisiana, USA}
}
@inproceedings{rrr02,
author = {R. Raman and V. Raman and S. S. Rao},
title = {Succinct indexable dictionaries with applications to encoding k-ary trees and multisets},
booktitle = {Proceedings of the thirteenth annual ACM-SIAM symposium on Discrete algorithms (SODA'02)},
year = {2002},
isbn = {0-89871-513-X},
pages = {233--242},
location = {San Francisco, California},
publisher = {Society for Industrial and Applied Mathematics},
address = {Philadelphia, PA, USA},
}

70
tex/bdz/bdz.tex Executable file
View File

@@ -0,0 +1,70 @@
\documentclass[12pt]{article}
\usepackage{graphicx}
\usepackage{latexsym}
\usepackage{url}
\usepackage{a4wide}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{amsfonts}
\usepackage{graphicx}
\usepackage{listings}
\usepackage{fancyhdr}
\usepackage{graphics}
\usepackage{multicol}
\usepackage{epsfig}
\usepackage{textcomp}
\usepackage{url}
% \usepackage{subfigure}
% \usepackage{subfig}
% \usepackage{wrapfig}
\bibliographystyle{plain}
% \bibliographystyle{sbc}
% \bibliographystyle{abnt-alf}
% \bibliographystyle{abnt-num}
\begin{document}
\sloppy
% \renewcommand{\baselinestretch}{1.24}\normalsize % set the space between lines to 1.24
% set headings
% \pagestyle{fancy}
% \lhead[\fancyplain{}{\footnotesize\thepage}]
% {\fancyplain{}{\footnotesize\rightmark}}
% \rhead[\fancyplain{}{\footnotesize\leftmark}]
% {\fancyplain{}{\footnotesize\thepage}}
%
% \cfoot{}
\lstset{
language=C,
basicstyle=\fontsize{8}{8}\selectfont,
captionpos=t,
aboveskip=0mm,
belowskip=0mm,
abovecaptionskip=0.5mm,
belowcaptionskip=0.5mm,
% numbers = left,
mathescape=true,
escapechar=@,
extendedchars=true,
showstringspaces=false,
% columns=fixed,
basewidth=0.515em,
frame=single,
framesep=1mm,
xleftmargin=1mm,
xrightmargin=1mm,
framerule=0pt
}
\include{introduction} % Introducao
\bibliography{bdz}
\end{document}

View File

@@ -0,0 +1,783 @@
%!PS-Adobe-2.0 EPSF-2.0
%%Title: overviewinternal3g.fig
%%Creator: fig2dev Version 3.2 Patchlevel 5
%%CreationDate: Fri May 29 11:09:04 2009
%%For: fbotelho@fbotelho-laptop (Fabiano C. Botelho,,,)
%%BoundingBox: 0 0 342 128
%Magnification: 1.0000
%%EndComments
%%BeginProlog
/MyAppDict 100 dict dup begin def
/$F2psDict 200 dict def
$F2psDict begin
$F2psDict /mtrx matrix put
/col-1 {0 setgray} bind def
/col0 {0.000 0.000 0.000 srgb} bind def
/col1 {0.000 0.000 1.000 srgb} bind def
/col2 {0.000 1.000 0.000 srgb} bind def
/col3 {0.000 1.000 1.000 srgb} bind def
/col4 {1.000 0.000 0.000 srgb} bind def
/col5 {1.000 0.000 1.000 srgb} bind def
/col6 {1.000 1.000 0.000 srgb} bind def
/col7 {1.000 1.000 1.000 srgb} bind def
/col8 {0.000 0.000 0.560 srgb} bind def
/col9 {0.000 0.000 0.690 srgb} bind def
/col10 {0.000 0.000 0.820 srgb} bind def
/col11 {0.530 0.810 1.000 srgb} bind def
/col12 {0.000 0.560 0.000 srgb} bind def
/col13 {0.000 0.690 0.000 srgb} bind def
/col14 {0.000 0.820 0.000 srgb} bind def
/col15 {0.000 0.560 0.560 srgb} bind def
/col16 {0.000 0.690 0.690 srgb} bind def
/col17 {0.000 0.820 0.820 srgb} bind def
/col18 {0.560 0.000 0.000 srgb} bind def
/col19 {0.690 0.000 0.000 srgb} bind def
/col20 {0.820 0.000 0.000 srgb} bind def
/col21 {0.560 0.000 0.560 srgb} bind def
/col22 {0.690 0.000 0.690 srgb} bind def
/col23 {0.820 0.000 0.820 srgb} bind def
/col24 {0.500 0.190 0.000 srgb} bind def
/col25 {0.630 0.250 0.000 srgb} bind def
/col26 {0.750 0.380 0.000 srgb} bind def
/col27 {1.000 0.500 0.500 srgb} bind def
/col28 {1.000 0.630 0.630 srgb} bind def
/col29 {1.000 0.750 0.750 srgb} bind def
/col30 {1.000 0.880 0.880 srgb} bind def
/col31 {1.000 0.840 0.000 srgb} bind def
end
% This junk string is used by the show operators
/PATsstr 1 string def
/PATawidthshow { % cx cy cchar rx ry string
% Loop over each character in the string
{ % cx cy cchar rx ry char
% Show the character
dup % cx cy cchar rx ry char char
PATsstr dup 0 4 -1 roll put % cx cy cchar rx ry char (char)
false charpath % cx cy cchar rx ry char
/clip load PATdraw
% Move past the character (charpath modified the
% current point)
currentpoint % cx cy cchar rx ry char x y
newpath
moveto % cx cy cchar rx ry char
% Reposition by cx,cy if the character in the string is cchar
3 index eq { % cx cy cchar rx ry
4 index 4 index rmoveto
} if
% Reposition all characters by rx ry
2 copy rmoveto % cx cy cchar rx ry
} forall
pop pop pop pop pop % -
currentpoint
newpath
moveto
} bind def
/PATcg {
7 dict dup begin
/lw currentlinewidth def
/lc currentlinecap def
/lj currentlinejoin def
/ml currentmiterlimit def
/ds [ currentdash ] def
/cc [ currentrgbcolor ] def
/cm matrix currentmatrix def
end
} bind def
% PATdraw - calculates the boundaries of the object and
% fills it with the current pattern
/PATdraw { % proc
save exch
PATpcalc % proc nw nh px py
5 -1 roll exec % nw nh px py
newpath
PATfill % -
restore
} bind def
% PATfill - performs the tiling for the shape
/PATfill { % nw nh px py PATfill -
PATDict /CurrentPattern get dup begin
setfont
% Set the coordinate system to Pattern Space
PatternGState PATsg
% Set the color for uncolored pattezns
PaintType 2 eq { PATDict /PColor get PATsc } if
% Create the string for showing
3 index string % nw nh px py str
% Loop for each of the pattern sources
0 1 Multi 1 sub { % nw nh px py str source
% Move to the starting location
3 index 3 index % nw nh px py str source px py
moveto % nw nh px py str source
% For multiple sources, set the appropriate color
Multi 1 ne { dup PC exch get PATsc } if
% Set the appropriate string for the source
0 1 7 index 1 sub { 2 index exch 2 index put } for pop
% Loop over the number of vertical cells
3 index % nw nh px py str nh
{ % nw nh px py str
currentpoint % nw nh px py str cx cy
2 index oldshow % nw nh px py str cx cy
YStep add moveto % nw nh px py str
} repeat % nw nh px py str
} for
5 { pop } repeat
end
} bind def
% PATkshow - kshow with the current pattezn
/PATkshow { % proc string
exch bind % string proc
1 index 0 get % string proc char
% Loop over all but the last character in the string
0 1 4 index length 2 sub {
% string proc char idx
% Find the n+1th character in the string
3 index exch 1 add get % string proc char char+1
exch 2 copy % strinq proc char+1 char char+1 char
% Now show the nth character
PATsstr dup 0 4 -1 roll put % string proc chr+1 chr chr+1 (chr)
false charpath % string proc char+1 char char+1
/clip load PATdraw
% Move past the character (charpath modified the current point)
currentpoint newpath moveto
% Execute the user proc (should consume char and char+1)
mark 3 1 roll % string proc char+1 mark char char+1
4 index exec % string proc char+1 mark...
cleartomark % string proc char+1
} for
% Now display the last character
PATsstr dup 0 4 -1 roll put % string proc (char+1)
false charpath % string proc
/clip load PATdraw
neewath
pop pop % -
} bind def
% PATmp - the makepattern equivalent
/PATmp { % patdict patmtx PATmp patinstance
exch dup length 7 add % We will add 6 new entries plus 1 FID
dict copy % Create a new dictionary
begin
% Matrix to install when painting the pattern
TilingType PATtcalc
/PatternGState PATcg def
PatternGState /cm 3 -1 roll put
% Check for multi pattern sources (Level 1 fast color patterns)
currentdict /Multi known not { /Multi 1 def } if
% Font dictionary definitions
/FontType 3 def
% Create a dummy encoding vector
/Encoding 256 array def
3 string 0 1 255 {
Encoding exch dup 3 index cvs cvn put } for pop
/FontMatrix matrix def
/FontBBox BBox def
/BuildChar {
mark 3 1 roll % mark dict char
exch begin
Multi 1 ne {PaintData exch get}{pop} ifelse % mark [paintdata]
PaintType 2 eq Multi 1 ne or
{ XStep 0 FontBBox aload pop setcachedevice }
{ XStep 0 setcharwidth } ifelse
currentdict % mark [paintdata] dict
/PaintProc load % mark [paintdata] dict paintproc
end
gsave
false PATredef exec true PATredef
grestore
cleartomark % -
} bind def
currentdict
end % newdict
/foo exch % /foo newlict
definefont % newfont
} bind def
% PATpcalc - calculates the starting point and width/height
% of the tile fill for the shape
/PATpcalc { % - PATpcalc nw nh px py
PATDict /CurrentPattern get begin
gsave
% Set up the coordinate system to Pattern Space
% and lock down pattern
PatternGState /cm get setmatrix
BBox aload pop pop pop translate
% Determine the bounding box of the shape
pathbbox % llx lly urx ury
grestore
% Determine (nw, nh) the # of cells to paint width and height
PatHeight div ceiling % llx lly urx qh
4 1 roll % qh llx lly urx
PatWidth div ceiling % qh llx lly qw
4 1 roll % qw qh llx lly
PatHeight div floor % qw qh llx ph
4 1 roll % ph qw qh llx
PatWidth div floor % ph qw qh pw
4 1 roll % pw ph qw qh
2 index sub cvi abs % pw ph qs qh-ph
exch 3 index sub cvi abs exch % pw ph nw=qw-pw nh=qh-ph
% Determine the starting point of the pattern fill
%(px, py)
4 2 roll % nw nh pw ph
PatHeight mul % nw nh pw py
exch % nw nh py pw
PatWidth mul exch % nw nh px py
end
} bind def
% Save the original routines so that we can use them later on
/oldfill /fill load def
/oldeofill /eofill load def
/oldstroke /stroke load def
/oldshow /show load def
/oldashow /ashow load def
/oldwidthshow /widthshow load def
/oldawidthshow /awidthshow load def
/oldkshow /kshow load def
% These defs are necessary so that subsequent procs don't bind in
% the originals
/fill { oldfill } bind def
/eofill { oldeofill } bind def
/stroke { oldstroke } bind def
/show { oldshow } bind def
/ashow { oldashow } bind def
/widthshow { oldwidthshow } bind def
/awidthshow { oldawidthshow } bind def
/kshow { oldkshow } bind def
/PATredef {
MyAppDict begin
{
/fill { /clip load PATdraw newpath } bind def
/eofill { /eoclip load PATdraw newpath } bind def
/stroke { PATstroke } bind def
/show { 0 0 null 0 0 6 -1 roll PATawidthshow } bind def
/ashow { 0 0 null 6 3 roll PATawidthshow }
bind def
/widthshow { 0 0 3 -1 roll PATawidthshow }
bind def
/awidthshow { PATawidthshow } bind def
/kshow { PATkshow } bind def
} {
/fill { oldfill } bind def
/eofill { oldeofill } bind def
/stroke { oldstroke } bind def
/show { oldshow } bind def
/ashow { oldashow } bind def
/widthshow { oldwidthshow } bind def
/awidthshow { oldawidthshow } bind def
/kshow { oldkshow } bind def
} ifelse
end
} bind def
false PATredef
% Conditionally define setcmykcolor if not available
/setcmykcolor where { pop } {
/setcmykcolor {
1 sub 4 1 roll
3 {
3 index add neg dup 0 lt { pop 0 } if 3 1 roll
} repeat
setrgbcolor - pop
} bind def
} ifelse
/PATsc { % colorarray
aload length % c1 ... cn length
dup 1 eq { pop setgray } { 3 eq { setrgbcolor } { setcmykcolor
} ifelse } ifelse
} bind def
/PATsg { % dict
begin
lw setlinewidth
lc setlinecap
lj setlinejoin
ml setmiterlimit
ds aload pop setdash
cc aload pop setrgbcolor
cm setmatrix
end
} bind def
/PATDict 3 dict def
/PATsp {
true PATredef
PATDict begin
/CurrentPattern exch def
% If it's an uncolored pattern, save the color
CurrentPattern /PaintType get 2 eq {
/PColor exch def
} if
/CColor [ currentrgbcolor ] def
end
} bind def
% PATstroke - stroke with the current pattern
/PATstroke {
countdictstack
save
mark
{
currentpoint strokepath moveto
PATpcalc % proc nw nh px py
clip newpath PATfill
} stopped {
(*** PATstroke Warning: Path is too complex, stroking
with gray) =
cleartomark
restore
countdictstack exch sub dup 0 gt
{ { end } repeat } { pop } ifelse
gsave 0.5 setgray oldstroke grestore
} { pop restore pop } ifelse
newpath
} bind def
/PATtcalc { % modmtx tilingtype PATtcalc tilematrix
% Note: tiling types 2 and 3 are not supported
gsave
exch concat % tilingtype
matrix currentmatrix exch % cmtx tilingtype
% Tiling type 1 and 3: constant spacing
2 ne {
% Distort the pattern so that it occupies
% an integral number of device pixels
dup 4 get exch dup 5 get exch % tx ty cmtx
XStep 0 dtransform
round exch round exch % tx ty cmtx dx.x dx.y
XStep div exch XStep div exch % tx ty cmtx a b
0 YStep dtransform
round exch round exch % tx ty cmtx a b dy.x dy.y
YStep div exch YStep div exch % tx ty cmtx a b c d
7 -3 roll astore % { a b c d tx ty }
} if
grestore
} bind def
/PATusp {
false PATredef
PATDict begin
CColor PATsc
end
} bind def
% crosshatch30
11 dict begin
/PaintType 1 def
/PatternType 1 def
/TilingType 1 def
/BBox [0 0 1 1] def
/XStep 1 def
/YStep 1 def
/PatWidth 1 def
/PatHeight 1 def
/Multi 2 def
/PaintData [
{ clippath } bind
{ 32 16 true [ 32 0 0 -16 0 16 ]
{<033003300c0c0c0c30033003c000c000300330030c0c0c0c
0330033000c000c0033003300c0c0c0c30033003c000c000
300330030c0c0c0c0330033000c000c0>}
imagemask } bind
] def
/PaintProc {
pop
exec fill
} def
currentdict
end
/P3 exch def
/cp {closepath} bind def
/ef {eofill} bind def
/gr {grestore} bind def
/gs {gsave} bind def
/sa {save} bind def
/rs {restore} bind def
/l {lineto} bind def
/m {moveto} bind def
/rm {rmoveto} bind def
/n {newpath} bind def
/s {stroke} bind def
/sh {show} bind def
/slc {setlinecap} bind def
/slj {setlinejoin} bind def
/slw {setlinewidth} bind def
/srgb {setrgbcolor} bind def
/rot {rotate} bind def
/sc {scale} bind def
/sd {setdash} bind def
/ff {findfont} bind def
/sf {setfont} bind def
/scf {scalefont} bind def
/sw {stringwidth} bind def
/tr {translate} bind def
/tnt {dup dup currentrgbcolor
4 -2 roll dup 1 exch sub 3 -1 roll mul add
4 -2 roll dup 1 exch sub 3 -1 roll mul add
4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb}
bind def
/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul
4 -2 roll mul srgb} bind def
/DrawEllipse {
/endangle exch def
/startangle exch def
/yrad exch def
/xrad exch def
/y exch def
/x exch def
/savematrix mtrx currentmatrix def
x y tr xrad yrad sc 0 0 1 startangle endangle arc
closepath
savematrix setmatrix
} def
/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def
/$F2psEnd {$F2psEnteredState restore end} def
/pageheader {
save
newpath 0 128 moveto 0 0 lineto 342 0 lineto 342 128 lineto closepath clip newpath
-40.3 230.6 translate
1 -1 scale
$F2psBegin
10 setmiterlimit
0 slj 0 slc
0.06299 0.06299 sc
} bind def
/pagefooter {
$F2psEnd
restore
} bind def
%%EndProlog
pageheader
%
% Fig objects follow
%
%
% here starts figure with depth 53
% Polyline
0 slj
0 slc
7.500 slw
n 757 1980 m 652 1980 652 2640 105 arcto 4 {pop} repeat
652 2745 1155 2745 105 arcto 4 {pop} repeat
1260 2745 1260 2085 105 arcto 4 {pop} repeat
1260 1980 757 1980 105 arcto 4 {pop} repeat
cp gs col0 s gr
% here ends figure;
%
% here starts figure with depth 51
% Polyline
0 slj
0 slc
7.500 slw
gs clippath
5215 2261 m 5264 2278 l 5278 2235 l 5229 2219 l 5229 2219 l 5251 2250 l 5215 2261 l cp
eoclip
n 4399 1969 m
5257 2252 l gs col7 1.00 shd ef gr gs col0 s gr gr
% arrowhead
n 5215 2261 m 5251 2250 l 5229 2219 l 5215 2261 l cp gs 0.00 setgray ef gr col0 s
% Polyline
gs clippath
5223 2432 m 5272 2449 l 5286 2406 l 5237 2390 l 5237 2390 l 5259 2421 l 5223 2432 l cp
eoclip
n 4407 2140 m
5265 2423 l gs col7 1.00 shd ef gr gs col0 s gr gr
% arrowhead
n 5223 2432 m 5259 2421 l 5237 2390 l 5223 2432 l cp gs 0.00 setgray ef gr col0 s
% Polyline
gs clippath
5216 2650 m 5267 2647 l 5264 2602 l 5213 2605 l 5213 2605 l 5245 2626 l 5216 2650 l cp
eoclip
n 4398 2687 m
5251 2626 l gs col7 1.00 shd ef gr gs col0 s gr gr
% arrowhead
n 5216 2650 m 5245 2626 l 5213 2605 l 5216 2650 l cp gs 0.00 setgray ef gr col0 s
% Polyline
n 5362 2523 m 5752 2523 l 5752 2696 l 5362 2696 l
cp gs col7 1.00 shd ef gr gs col0 s gr
% Polyline
n 5362 2165 m 5752 2165 l 5752 2338 l 5362 2338 l
cp gs col7 1.00 shd ef gr gs col0 s gr
% Polyline
0.000 slw
n 720 2070 m 900 2070 l 900 2160 l 720 2160 l
cp gs /PC [[1.00 1.00 1.00] [0.00 0.00 0.00]] def
15.00 15.00 sc P3 [16 0 0 -8 48.00 138.00] PATmp PATsp ef gr PATusp
% Polyline
n 720 2565 m 900 2565 l 900 2655 l 720 2655 l
cp gs col7 0.00 shd ef gr
% Polyline
7.500 slw
n 4245 2415 m 4425 2415 l 4425 2595 l 4245 2595 l
cp gs col7 1.00 shd ef gr gs col0 s gr
% Polyline
n 4245 2235 m 4425 2235 l 4425 2415 l 4245 2415 l
cp gs col7 1.00 shd ef gr gs col0 s gr
% Polyline
n 5362 2343 m 5752 2343 l 5752 2516 l 5362 2516 l
cp gs col7 1.00 shd ef gr gs col0 s gr
% Polyline
n 2835 3150 m 3330 3150 l 3330 3465 l 2835 3465 l
cp gs col7 1.00 shd ef gr gs col0 s gr
% Polyline
0.000 slw
n 2880 3330 m 3015 3330 l 3015 3420 l 2880 3420 l
cp gs col7 0.00 shd ef gr
% Polyline
7.500 slw
n 2340 3150 m 2835 3150 l 2835 3465 l 2340 3465 l
cp gs col7 1.00 shd ef gr gs col0 s gr
% Polyline
n 1845 3150 m 2340 3150 l 2340 3465 l 1845 3465 l
cp gs col7 1.00 shd ef gr gs col0 s gr
% Polyline
0.000 slw
n 2385 3330 m 2520 3330 l 2520 3420 l 2385 3420 l
cp gs /PC [[1.00 1.00 1.00] [0.00 0.00 0.00]] def
15.00 15.00 sc P3 [16 0 0 -8 159.00 222.00] PATmp PATsp ef gr PATusp
% Polyline
n 2602 3017 m 2605 2425 l 2792 2423 l 2788 3044 l 2588 3030 l
cp gs /PC [[1.00 1.00 1.00] [0.00 0.00 0.00]] def
15.00 15.00 sc P3 [16 0 0 -8 172.53 161.53] PATmp PATsp ef gr PATusp
% Polyline
n 2609 2477 m 2612 1885 l 2799 1883 l 2795 2504 l 2595 2490 l
cp gs /PC [[1.00 1.00 1.00] [0.00 0.00 0.00]] def
15.00 15.00 sc P3 [16 0 0 -8 173.00 125.53] PATmp PATsp ef gr PATusp
% Polyline
7.500 slw
n 4245 1890 m 4425 1890 l 4425 2070 l 4245 2070 l
cp gs col7 0.85 shd ef gr gs col0 s gr
% Polyline
n 4245 2063 m 4425 2063 l 4425 2243 l 4245 2243 l
cp gs col7 0.85 shd ef gr gs col0 s gr
% Polyline
n 4245 2595 m 4425 2595 l 4425 2775 l 4245 2775 l
cp gs col7 0.85 shd ef gr gs col0 s gr
% Polyline
n 4247 2748 m 4427 2748 l 4427 2928 l 4247 2928 l
cp gs col7 1.00 shd ef gr gs col0 s gr
% Polyline
0.000 slw
n 2657 3060 m 2111 2491 l 2244 2360 l 2786 2937 l
cp gs col7 0.00 shd ef gr
% Polyline
n 2111 2402 m 2660 1838 l 2797 1966 l 2242 2527 l
cp gs col7 0.55 shd ef gr
% Polyline
n 2115 3017 m 2118 2425 l 2305 2423 l 2301 3044 l 2101 3030 l
cp gs col7 0.55 shd ef gr
% Polyline
n 1890 3330 m 2025 3330 l 2025 3420 l 1890 3420 l
cp gs col7 0.55 shd ef gr
% Polyline
n 720 2340 m 900 2340 l 900 2430 l 720 2430 l
cp gs col7 0.55 shd ef gr
% Polyline
n 2113 2439 m 2116 1847 l 2303 1845 l 2299 2466 l 2099 2452 l
cp gs col7 0.00 shd ef gr
/Times-Italic ff 142.88 scf sf
2835 2474 m
gs 1 -1 sc (h \(x\)) col0 sh gr
/Times-Roman ff 111.13 scf sf
2916 2520 m
gs 1 -1 sc (1) col0 sh gr
/Times-Italic ff 142.88 scf sf
2835 3030 m
gs 1 -1 sc (h \(x\)) col0 sh gr
/Times-Roman ff 111.13 scf sf
2916 3076 m
gs 1 -1 sc (2) col0 sh gr
/Times-Italic ff 142.88 scf sf
2835 1950 m
gs 1 -1 sc (h \(x\)) col0 sh gr
/Times-Roman ff 111.13 scf sf
2916 1996 m
gs 1 -1 sc (0) col0 sh gr
/Times-Roman ff 142.88 scf sf
4095 2025 m
gs 1 -1 sc (0) col0 sh gr
/Times-Roman ff 142.88 scf sf
4095 2205 m
gs 1 -1 sc (1) col0 sh gr
/Times-Roman ff 142.88 scf sf
4095 2385 m
gs 1 -1 sc (2) col0 sh gr
/Times-Roman ff 142.88 scf sf
4095 2565 m
gs 1 -1 sc (3) col0 sh gr
/Times-Roman ff 142.88 scf sf
4095 2745 m
gs 1 -1 sc (4) col0 sh gr
/Times-Roman ff 142.88 scf sf
4095 2925 m
gs 1 -1 sc (5) col0 sh gr
/Times-Italic ff 142.88 scf sf
4320 1800 m
gs 1 -1 sc (g) col0 sh gr
/Times-Roman ff 142.88 scf sf
5220 2115 m
gs 1 -1 sc (Hash Table ) col0 sh gr
/Times-Roman ff 142.88 scf sf
5265 2475 m
gs 1 -1 sc (1) col0 sh gr
/Times-Roman ff 142.88 scf sf
5265 2655 m
gs 1 -1 sc (2) col0 sh gr
/Times-Roman ff 142.88 scf sf
5265 2295 m
gs 1 -1 sc (0) col0 sh gr
/Times-Roman ff 158.75 scf sf
1575 1755 m
gs 1 -1 sc (\(a\)) col0 sh gr
/Times-Roman ff 158.75 scf sf
3465 1755 m
gs 1 -1 sc (\(b\)) col0 sh gr
/Times-Roman ff 158.75 scf sf
4680 1755 m
gs 1 -1 sc (\(c\)) col0 sh gr
/Times-Roman ff 142.88 scf sf
3015 3645 m
gs 1 -1 sc (2) col0 sh gr
/Times-Roman ff 142.88 scf sf
2565 3645 m
gs 1 -1 sc (1) col0 sh gr
/Times-Roman ff 142.88 scf sf
2070 3645 m
gs 1 -1 sc (0) col0 sh gr
/ZapfChancery-MediumItalic ff 174.63 scf sf
3420 3375 m
gs 1 -1 sc (L) col0 sh gr
/Times-Roman ff 142.88 scf sf
2865 3277 m
gs 1 -1 sc ({0,2,5}) col0 sh gr
/Times-Roman ff 142.88 scf sf
2370 3277 m
gs 1 -1 sc ({1,3,5}) col0 sh gr
/Times-Roman ff 142.88 scf sf
1895 3277 m
gs 1 -1 sc ({1,2,4}) col0 sh gr
% here ends figure;
%
% here starts figure with depth 45
% Polyline
0 slj
0 slc
7.500 slw
gs clippath
1944 2497 m 1995 2497 l 1995 2452 l 1944 2452 l 1944 2452 l 1974 2475 l 1944 2497 l cp
eoclip
n 1357 2475 m
1980 2475 l gs col7 1.00 shd ef gr gs col0 s gr gr
% arrowhead
n 1944 2497 m 1974 2475 l 1944 2452 l 1944 2497 l cp gs 0.00 setgray ef gr col0 s
% Polyline
gs clippath
3879 2497 m 3930 2497 l 3930 2452 l 3879 2452 l 3879 2452 l 3909 2475 l 3879 2497 l cp
eoclip
n 3292 2475 m
3915 2475 l gs col7 1.00 shd ef gr gs col0 s gr gr
% arrowhead
n 3879 2497 m 3909 2475 l 3879 2452 l 3879 2497 l cp gs 0.00 setgray ef gr col0 s
% Ellipse
n 2704 2448 101 101 0 360 DrawEllipse gs col7 1.00 shd ef gr gs col0 s gr
% Ellipse
n 2209 2448 101 101 0 360 DrawEllipse gs col7 1.00 shd ef gr gs col0 s gr
% Ellipse
n 2704 2988 101 101 0 360 DrawEllipse gs col7 1.00 shd ef gr gs col0 s gr
% Ellipse
n 2209 1908 101 101 0 360 DrawEllipse gs col7 1.00 shd ef gr gs col0 s gr
% Ellipse
n 2704 1908 101 101 0 360 DrawEllipse gs col7 1.00 shd ef gr gs col0 s gr
% Ellipse
n 2209 2988 101 101 0 360 DrawEllipse gs col7 1.00 shd ef gr gs col0 s gr
/Times-Roman ff 142.88 scf sf
5423 2663 m
gs 1 -1 sc (band) col0 sh gr
/Times-Roman ff 142.88 scf sf
5460 2304 m
gs 1 -1 sc (the) col0 sh gr
/Times-Roman ff 142.88 scf sf
1418 2430 m
gs 1 -1 sc (Mapping) col0 sh gr
/Times-Roman ff 142.88 scf sf
3285 2430 m
gs 1 -1 sc (Assigning) col0 sh gr
/Times-Roman ff 142.88 scf sf
2674 2485 m
gs 1 -1 sc (3) col0 sh gr
/Times-Roman ff 142.88 scf sf
2179 2485 m
gs 1 -1 sc (2) col0 sh gr
/Times-Italic ff 142.88 scf sf
945 1935 m
gs 1 -1 sc (S) col0 sh gr
/Times-Roman ff 142.88 scf sf
967 2160 m
gs 1 -1 sc (who) col0 sh gr
/Times-Roman ff 142.88 scf sf
960 2430 m
gs 1 -1 sc (band) col0 sh gr
/Times-Roman ff 142.88 scf sf
1005 2655 m
gs 1 -1 sc (the) col0 sh gr
/Times-Roman ff 142.88 scf sf
4305 2378 m
gs 1 -1 sc (3) col0 sh gr
/Times-Roman ff 142.88 scf sf
5422 2482 m
gs 1 -1 sc (who) col0 sh gr
/Times-Roman ff 142.88 scf sf
4545 2430 m
gs 1 -1 sc (Ranking) col0 sh gr
/Times-Roman ff 142.88 scf sf
3060 3420 m
gs 1 -1 sc (the) col0 sh gr
/Times-Roman ff 142.88 scf sf
2539 3420 m
gs 1 -1 sc (who) col0 sh gr
/Times-Roman ff 142.88 scf sf
2045 3420 m
gs 1 -1 sc (band) col0 sh gr
/Times-Roman ff 142.88 scf sf
2179 1945 m
gs 1 -1 sc (0) col0 sh gr
/Times-Roman ff 142.88 scf sf
2674 1945 m
gs 1 -1 sc (1) col0 sh gr
/Times-Roman ff 142.88 scf sf
2179 3025 m
gs 1 -1 sc (4) col0 sh gr
/Times-Roman ff 142.88 scf sf
2674 3025 m
gs 1 -1 sc (5) col0 sh gr
/Times-Roman ff 142.88 scf sf
4300 2875 m
gs 1 -1 sc (3) col0 sh gr
/Times-Roman ff 142.88 scf sf
4305 2548 m
gs 1 -1 sc (3) col0 sh gr
/Times-Roman ff 142.88 scf sf
4305 2715 m
gs 1 -1 sc (2) col0 sh gr
/Times-Roman ff 142.88 scf sf
4299 2190 m
gs 1 -1 sc (0) col0 sh gr
/Times-Roman ff 142.88 scf sf
4299 2033 m
gs 1 -1 sc (0) col0 sh gr
% here ends figure;
pagefooter
showpage
%%Trailer
end
%EOF

View File

@@ -0,0 +1,156 @@
#FIG 3.2 Produced by xfig version 3.2.5
Landscape
Center
Metric
A4
100.00
Single
-2
1200 2
6 5355 2520 5760 2700
6 5400 2520 5715 2700
4 0 0 45 -1 0 9 0.0000 4 105 285 5423 2663 band\001
-6
2 2 0 1 0 7 50 -1 20 0.000 0 0 7 0 0 5
5362 2523 5752 2523 5752 2696 5362 2696 5362 2523
-6
6 5355 2162 5760 2342
2 2 0 1 0 7 50 -1 20 0.000 0 0 7 0 0 5
5362 2165 5752 2165 5752 2338 5362 2338 5362 2165
4 0 0 45 -1 0 9 0.0000 4 105 195 5460 2304 the\001
-6
6 1350 2340 1980 2520
6 1350 2340 1980 2520
2 1 0 1 0 7 45 -1 20 0.000 0 0 7 1 0 2
1 1 1.00 45.00 30.00
1357 2475 1980 2475
4 0 0 45 -1 0 9 0.0000 4 135 555 1418 2430 Mapping\001
-6
-6
6 3285 2340 3915 2520
6 3285 2340 3915 2520
2 1 0 1 0 7 45 -1 20 0.000 0 0 7 1 0 2
1 1 1.00 45.00 30.00
3292 2475 3915 2475
4 0 0 45 -1 0 9 0.0000 4 135 630 3285 2430 Assigning\001
-6
-6
6 2603 2347 2805 2549
1 3 0 1 0 7 45 -1 20 0.000 1 0.0000 2704 2448 101 101 2704 2448 2749 2538
4 0 0 45 -1 0 9 0.0000 4 105 75 2674 2485 3\001
-6
6 2108 2347 2310 2549
1 3 0 1 0 7 45 -1 20 0.000 1 0.0000 2209 2448 101 101 2209 2448 2254 2538
4 0 0 45 -1 0 9 0.0000 4 105 75 2179 2485 2\001
-6
6 2835 2340 3150 2520
4 0 0 50 -1 1 9 0.0000 4 135 300 2835 2474 h (x)\001
4 0 0 50 -1 0 7 0.0000 4 75 60 2916 2520 1\001
-6
6 2835 2925 3150 3105
4 0 0 50 -1 1 9 0.0000 4 135 300 2835 3030 h (x)\001
4 0 0 50 -1 0 7 0.0000 4 75 60 2916 3076 2\001
-6
6 2835 1845 3135 1996
4 0 0 50 -1 1 9 0.0000 4 135 300 2835 1950 h (x)\001
4 0 0 50 -1 0 7 0.0000 4 75 60 2916 1996 0\001
-6
1 3 0 1 0 7 45 -1 20 0.000 1 0.0000 2704 2988 101 101 2704 2988 2749 3078
1 3 0 1 0 7 45 -1 20 0.000 1 0.0000 2209 1908 101 101 2209 1908 2254 1998
1 3 0 1 0 7 45 -1 20 0.000 1 0.0000 2704 1908 101 101 2704 1908 2749 1998
1 3 0 1 0 7 45 -1 20 0.000 1 0.0000 2209 2988 101 101 2209 2988 2254 3078
2 4 0 1 0 7 53 -1 -1 0.000 0 0 7 0 0 5
1260 2745 1260 1980 652 1980 652 2745 1260 2745
2 2 0 0 0 7 50 -1 43 0.000 0 0 -1 0 0 5
720 2070 900 2070 900 2160 720 2160 720 2070
2 2 0 0 0 7 50 -1 0 0.000 0 0 7 0 0 5
720 2565 900 2565 900 2655 720 2655 720 2565
2 2 0 1 0 7 50 -1 20 0.000 0 0 7 0 0 5
4245 2415 4425 2415 4425 2595 4245 2595 4245 2415
2 2 0 1 0 7 50 -1 20 0.000 0 0 7 0 0 5
4245 2235 4425 2235 4425 2415 4245 2415 4245 2235
2 1 0 1 0 7 51 -1 20 0.000 0 0 7 1 0 2
1 1 1.00 45.00 30.00
4399 1969 5257 2252
2 2 0 1 0 7 50 -1 20 0.000 0 0 7 0 0 5
5362 2343 5752 2343 5752 2516 5362 2516 5362 2343
2 1 0 1 0 7 51 -1 20 0.000 0 0 7 1 0 2
1 1 1.00 45.00 30.00
4407 2140 5265 2423
2 1 0 1 0 7 51 -1 20 0.000 0 0 7 1 0 2
1 1 1.00 45.00 30.00
4398 2687 5251 2626
2 2 0 1 0 7 50 -1 20 0.000 0 0 7 0 0 5
2835 3150 3330 3150 3330 3465 2835 3465 2835 3150
2 2 0 0 0 7 50 -1 0 0.000 0 0 7 0 0 5
2880 3330 3015 3330 3015 3420 2880 3420 2880 3330
2 2 0 1 0 7 50 -1 20 0.000 0 0 7 0 0 5
2340 3150 2835 3150 2835 3465 2340 3465 2340 3150
2 2 0 1 0 7 50 -1 20 0.000 0 0 7 0 0 5
1845 3150 2340 3150 2340 3465 1845 3465 1845 3150
2 2 0 0 0 7 50 -1 43 0.000 0 0 7 0 0 5
2385 3330 2520 3330 2520 3420 2385 3420 2385 3330
2 3 0 0 0 7 50 -1 43 0.000 0 0 7 0 0 6
2602 3017 2605 2425 2792 2423 2788 3044 2588 3030 2602 3017
2 3 0 0 0 7 50 -1 43 0.000 0 0 7 0 0 6
2609 2477 2612 1885 2799 1883 2795 2504 2595 2490 2609 2477
2 2 0 1 0 7 50 -1 17 0.000 0 0 7 0 0 5
4245 1890 4425 1890 4425 2070 4245 2070 4245 1890
2 2 0 1 0 7 50 -1 17 0.000 0 0 7 0 0 5
4245 2063 4425 2063 4425 2243 4245 2243 4245 2063
2 2 0 1 0 7 50 -1 17 0.000 0 0 7 0 0 5
4245 2595 4425 2595 4425 2775 4245 2775 4245 2595
2 2 0 1 0 7 50 -1 20 0.000 0 0 7 0 0 5
4247 2748 4427 2748 4427 2928 4247 2928 4247 2748
2 3 0 0 0 7 50 -1 0 0.000 0 0 7 0 0 5
2657 3060 2111 2491 2244 2360 2786 2937 2657 3060
2 3 0 0 0 7 50 -1 11 0.000 0 0 7 0 0 5
2111 2402 2660 1838 2797 1966 2242 2527 2111 2402
2 3 0 0 0 7 50 -1 11 0.000 0 0 7 0 0 6
2115 3017 2118 2425 2305 2423 2301 3044 2101 3030 2115 3017
2 2 0 0 0 7 50 -1 11 0.000 0 0 7 0 0 5
1890 3330 2025 3330 2025 3420 1890 3420 1890 3330
2 2 0 0 0 7 50 -1 11 0.000 0 0 7 0 0 5
720 2340 900 2340 900 2430 720 2430 720 2340
2 3 0 0 0 7 50 -1 0 0.000 0 0 7 0 0 6
2113 2439 2116 1847 2303 1845 2299 2466 2099 2452 2113 2439
4 0 0 45 -1 1 9 0.0000 4 105 75 945 1935 S\001
4 0 0 45 -1 0 9 0.0000 4 105 270 967 2160 who\001
4 0 0 45 -1 0 9 0.0000 4 105 285 960 2430 band\001
4 0 0 45 -1 0 9 0.0000 4 105 195 1005 2655 the\001
4 0 0 50 -1 0 9 0.0000 4 105 75 4095 2025 0\001
4 0 0 50 -1 0 9 0.0000 4 105 75 4095 2205 1\001
4 0 0 50 -1 0 9 0.0000 4 105 75 4095 2385 2\001
4 0 0 50 -1 0 9 0.0000 4 105 75 4095 2565 3\001
4 0 0 50 -1 0 9 0.0000 4 105 75 4095 2745 4\001
4 0 0 50 -1 0 9 0.0000 4 105 75 4095 2925 5\001
4 0 0 50 -1 1 9 0.0000 4 120 60 4320 1800 g\001
4 0 0 45 -1 0 9 0.0000 4 105 75 4305 2378 3\001
4 0 0 50 -1 0 9 0.0000 4 105 810 5220 2115 Hash Table \001
4 0 0 45 -1 0 9 0.0000 4 105 270 5422 2482 who\001
4 0 0 50 -1 0 9 0.0000 4 105 75 5265 2475 1\001
4 0 0 50 -1 0 9 0.0000 4 105 75 5265 2655 2\001
4 0 0 50 -1 0 9 0.0000 4 105 75 5265 2295 0\001
4 0 0 50 -1 0 10 0.0000 4 135 180 1575 1755 (a)\001
4 0 0 50 -1 0 10 0.0000 4 135 195 3465 1755 (b)\001
4 0 0 50 -1 0 10 0.0000 4 135 180 4680 1755 (c)\001
4 0 0 45 -1 0 9 0.0000 4 135 510 4545 2430 Ranking\001
4 0 0 50 -1 0 9 0.0000 4 105 75 3015 3645 2\001
4 0 0 50 -1 0 9 0.0000 4 105 75 2565 3645 1\001
4 0 0 50 -1 0 9 0.0000 4 105 75 2070 3645 0\001
4 0 0 50 -1 33 11 0.0000 4 135 90 3420 3375 L\001
4 0 0 50 -1 0 9 0.0000 4 135 435 2865 3277 {0,2,5}\001
4 0 0 45 -1 0 9 0.0000 4 105 195 3060 3420 the\001
4 0 0 50 -1 0 9 0.0000 4 135 435 2370 3277 {1,3,5}\001
4 0 0 45 -1 0 9 0.0000 4 105 270 2539 3420 who\001
4 0 0 45 -1 0 9 0.0000 4 105 285 2045 3420 band\001
4 0 0 50 -1 0 9 0.0000 4 135 435 1895 3277 {1,2,4}\001
4 0 0 45 -1 0 9 0.0000 4 105 75 2179 1945 0\001
4 0 0 45 -1 0 9 0.0000 4 105 75 2674 1945 1\001
4 0 0 45 -1 0 9 0.0000 4 105 75 2179 3025 4\001
4 0 0 45 -1 0 9 0.0000 4 105 75 2674 3025 5\001
4 0 0 45 -1 0 9 0.0000 4 105 75 4300 2875 3\001
4 0 0 45 -1 0 9 0.0000 4 105 75 4305 2548 3\001
4 0 0 45 -1 0 9 0.0000 4 105 75 4305 2715 2\001
4 0 0 45 -1 0 9 0.0000 4 105 75 4299 2190 0\001
4 0 0 45 -1 0 9 0.0000 4 105 75 4299 2033 0\001

371
tex/bdz/introduction.tex Executable file
View File

@@ -0,0 +1,371 @@
\section{Introduction} \label{sec:introduction}
The BDZ algorithm was designed by Fabiano C. Botelho, Djamal Belazzougui, Rasmus Pagh and Nivio Ziviani.
It is a simple, efficient, near-optimal space and practical
algorithm to generate a family $\cal F$ of PHFs and MPHFs.
It is also referred to as BPZ algorithm because the work presented
by Botelho, Pagh and Ziviani in \cite{bpz07}.
In the Botelho's PhD. dissertation \cite{b08} it is also referred to as RAM algorithm
because it is more suitable for key sets that can be handled in internal memory.
The BDZ algorithm uses $r$-uniform random hypergraphs
given by function values of $r$
uniform random hash functions on the input key set $S$ for generating PHFs and MPHFs that
require $O(n)$ bits to be stored.
A hypergraph is the generalization of a standard undirected
graph where each edge connects $r\geq 2$ vertices.
This idea is not new, see e.g. \cite{mwhc96},
but we have proceed differently to achieve
a space usage of $O(n)$ bits rather than $O(n\log n)$ bits.
Evaluation time for all schemes considered is constant.
For $r=3$ we obtain a space usage of approximately $2.6n$ bits for
an MPHF. More compact, and even simpler, representations can be
achieved for larger $m$. For example, for $m=1.23n$ we can get a
space usage of $1.95n$ bits.
Our best MPHF space upper bound is within a
factor of 2 from the information theoretical lower bound of approximately
$1.4427n$ bits. We have shown that the BDZ algorithm is far more
practical than previous methods with proven space complexity, both
because of its simplicity, and because the constant factor of the
space complexity is more than 6 times lower than its closest
competitor, for plausible problem sizes. We verify the practicality
experimentally, using slightly more space than in the mentioned
theoretical bounds.
\section{The Algorithm}
The BDZ algorithm is a three-step algorithm that generates PHFs and MPHFs based on
random $r$-partite hypergraphs.
This is an approach that provides a much tighter analysis and is
much more simple than the one presented in
\cite{ckrt04}, where it was implicit how to construct
similar PHFs.
The fastest and most compact functions
are generated when $r=3$.
In this case a PHF can be stored in
approximately $1.95$ bits per key and
an MPHF in approximately
$2.62$ bits per key.
Figure~\ref{fig:overview} gives an overview of the algorithm for $r=3$,
taking as input a key set $S \subseteq U$ containing three English words, i.e., $S=\{\mathrm{who},\mathrm{band},\mathrm{the}\}$.
% which are nicely hashed to the name of a rock band ``the who band''.
The edge-oriented data structure proposed in~\cite{e87} is used
to represent hypergraphs, where each edge is explicitly represented
as an array of $r$ vertices and, for each vertex $v$,
there is a list of edges that are incident on $v$.
The {\em Mapping Step} in Figure~\ref{fig:overview}(a) carries out two
important tasks:
\begin{enumerate}
\item
It assumes that it is possible to find three uniform
hash functions, $h_0$, $h_1$ and $h_2$, with ranges $\{0,1\}$, $\{2,3\}$ and $\{4,5\}$, respectively.
These functions build an one-to-one mapping of the key set $S$ to the edge set $E$
of a random acyclic
$3$-partite hypergraph $G=(V,E)$, where $|V|=m=6$ and $|E|=n=3$.
In \cite{b08,bpz07} it is shown that
it is possible to obtain such a hypergraph with probability tending to $1$ as $n$
tends to infinity
whenever $m=cn$ and $c \ge 1.23$. The value of $c$ that minimizes the hypergraph size
(and thereby the amount of bits to represent the resulting functions) is $c \approx 1.23$.
To illustrate the mapping,
key ``who'' is mapped to edge $\{h_0(\text{``who''}),h_1(\text{``who''}),h_2(\text{``who''})\}=\{1,3,5\}$,
key ``band'' is mapped to edge $\{h_0(\text{``band''}),h_1(\text{``band''}),h_2(\text{``band''})\}=\{1,2,4\}$, and
key ``the'' is mapped to edge $\{h_0(\text{``the''}),h_1(\text{``the''}),h_2(\text{``the''})\}=\{0,2,5\}$.
\item
It tests whether the resulting random $3$-partite hypergraph contains cycles
by iteratively deleting edges connecting vertices of degree 1.
The deleted edges are stored in the order of deletion in a list $\cal L$
to be used in the assigning step.
The first deleted edge in Figure~\ref{fig:overview}(a)
was $\{1,2,4\}$, the second one was $\{1,3,5\}$ and
the third one was $\{0,2,5\}$.
% the last one was $\{0,2,5\}$.
If it ends with an empty graph, then the test succeeds,
otherwise it fails.
\end{enumerate}
\begin{figure}
\begin{center}
\scalebox{0.9}{\epsfig{file=figs/overviewinternal3g.eps}}
\end{center}
\caption{(a) The mapping step generates a random acyclic $3$-partite hypergraph with $m=6$ vertices and $n=3$ edges
and a list $\cal L$ of edges obtained when we test whether the hypergraph is acyclic.
(b) The assigning step builds an array $g:[0,5] \to [0,3]$ to uniquely
assign an edge to a vertex. (c) The ranking step builds the data structure used to
compute function $\mathit{rank}: [0,5] \to [0,2]$ in $O(1)$ time.~~~~}
\label{fig:overview}
\end{figure}
We now show how to use the Jenkins hash functions \cite{j97}
to implement the three hash functions $h_i: S \to V_i$, $0\le i \le 2$, which are used to build a random $3$-partite hypergraph
$G=(V,E)$,
where $V= V_0 \cup V_1 \cup V_2$ and $|V_i| = \eta = \lceil \frac{m}{3} \rceil$.
Let $h':S \to \{0,1\}^\gamma$ be a Jenkins hash function
for $\gamma = 3 \times w$, where
$w = 32 \text{ or } 64$ for
32-bit and 64-bit architectures, respectively.
Let $H'$ be an array of 3 $w$-bit values.
The Jenkins hash function
allow us to compute in parallel the three entries in $H'$
and thereby the three hash functions $h_i$, as follows:
% Thus we can compute the three hash functions $h_i$
% as follows:
\begin{eqnarray}
H' &=& h'(x) \nonumber \\
h_0(x) &=& H'[0] \bmod \eta \nonumber \\
h_1(x) &=& H'[1] \bmod \eta + \eta \nonumber \\
h_2(x) &=& H'[2] \bmod \eta + 2\eta
\end{eqnarray}
The {\em Assigning Step} in Figure~\ref{fig:overview}(b) outputs
a PHF that maps the key set $S$ into the range $[0,m-1]$ and is represented by
an array $g$ storing values from the range $[0,3]$.
The array $g$ allows to select one out of the $3$
vertices of a given edge, which is associated with a
key $k$.
A vertex for a key $k$ is given
by either $h_0(k)$, $h_1(k)$ or $h_2(k)$.
The function $h_i(k)$
to be used for $k$ is chosen by calculating $i = (g[h_0(k)] + g[h_1(k)] + g[h_2(k)]) \bmod 3$.
For instance,
the values 1 and 4 represent the keys ``who'' and ``band''
because $i = (g[1] + g[3] + g[5]) \bmod 3 = 0$ and $h_0(\text{``who''}) = 1$,
and $i = (g[1] + g[2] + g[4]) \bmod 3 = 2$ and $h_2(\text{``band''}) = 4$, respectively.
% Likewise, the value 4 represents the key
% because $(g[1] + g[2] + g[4]) \bmod 3 = 2$ and $h_2(\text{``band''}) = 4$, and so on.
The assigning step firstly initializes $g[i]=3$
to mark every vertex as unassigned
% (i.e., each vertex is unassigned)
and
$\mathit{Visited}[i]=\mathit{false}$, $0\leq i \leq m-1$.
Let $\mathit{Visited}$ be a boolean vector of size $m$
to indicate whether a vertex has been visited.
Then, for each edge $e \in \cal L$ from tail to head,
it looks for the first
vertex $u$ belonging to $e$ not yet visited.
This is a sufficient condition for success \cite{b08,bpz07,mwhc96}.
Let $j$, $0 \leq j \leq 2$, be the index of $u$ in $e$.
Then, it assigns $g[u]=(j-\sum_{v \in e \wedge \mathit{Visited}[v] = true} g[v]) \bmod 3$.
Whenever it passes through a vertex $u$ from $e$,
if $u$ has not yet been visited,
it sets $\mathit{Visited}[u] = true$.
% The value $g[i]=3$ is used to represent unassigned vertices.
If we stop the BDZ algorithm in the assigning step
we obtain a PHF with range $[0,m-1]$.
The PHF has the following form:
$phf(x) = h_{i(x)}(x)$, where $x\in S$ and $i(x) = (g[h_0(x)] + g[h_1(x)] + g[h_2(x)]) \bmod 3$.
In this case we do not need information for ranking and
can set $g[i] = 0$ whenever $g[i]$ is equal to 3, where $0 \le i \le m-1$.
Therefore, the range of the values stored in $g$ is narrowed
from $[0,3]$ to $[0,2]$. By using arithmetic coding as block of
values (see \cite{b08,bpz07} for details),
or any compression technique that allows to perform
random access in constant time to an array of compressed values \cite{fn07,gn06,sg06},
we can store the resulting PHFs in $m\log 3 = c n\log 3$ bits,
where $c \ge 1.23$. For $c = 1.23$, the space requirement is $1.95n$ bits.
The {\em Ranking Step} in Figure~\ref{fig:overview}(c)
outputs a data structure
that permits to narrow the range of a PHF generated in the
assigning step from $[0,m-1]$ to $[0,n-1]$ and thereby
an MPHF is produced.
The data structure allows to compute in constant time
a function $\mathit{rank}\!\!:[0,m-1]\to [0,n-1]$
that counts the number of assigned positions
before a given position $v$ in $g$.
For instance, $\mathit{rank}(4) = 2$ because
the positions $0$ and $1$ are assigned
since $g[0] \text{ and } g[1] \not = 3$.
% and they come before position 4 in $g$.
For the implementation of the ranking step
we have borrowed
a simple and efficient implementation from
\cite{dict-jour}.
It requires $\epsilon \, m$ additional bits of space, where $0 < \epsilon < 1$,
and is obtained by storing explicitly the
$\mathit{rank}$ of every $k$th index in a rankTable, where $k
=\lfloor\log(m)/\epsilon\rfloor$.
The larger is $k$ the more compact is the resulting MPHF.
Therefore, the users can tradeoff space for evaluation time
by setting $k$ appropriately in the implementation.
% In the implementation we let
% $k$ to be set by the users so that they can trade off
% space for evaluation time and vice-versa.
We only allow values for $k$
that are power of two (i.e., $k=2^{b_k}$ for some constant $b_k$) in order to replace the expensive
division and modulo operations by
bit-shift and bitwise ``and'' operations, respectively.
We have used $k=256$
in the experiments
for generating more succinct MPHFs.
We remark that it is still possible to obtain a more compact data structure by
using the results presented in \cite{os07,rrr02}, but at the cost of a much more
complex implementation.
We need to use an additional lookup table $T_r$
to guarantee the constant evaluation time of $\mathit{rank}(u)$.
Let us illustrate how $\mathit{rank}(u)$ is computed
using both the rankTable and the lookup table $T_r$.
We first look up
the rank of the largest precomputed index
$v\leq u$ in the rankTable,
and use $T_r$ to count the number of assigned vertices from position
$v$ to $u-1$.
The lookup table $T_r$ allows us to count in constant time
the number of assigned vertices in $\flat=\epsilon \log m$ bits,
where $0 < \epsilon < 1$. Thus the actual evaluation time is $O(1/\epsilon)$.
For simplicity and
without loss of generality we let $\flat$ be a multiple of the number of
bits $\beta$ used to encode each entry of $g$.
As the values in $g$ come from the range $[0,3]$,
then $\beta=2$ bits and we have tried $\flat = 8 \text{ and } 16$.
We would expect that $\flat = 16$ should provide
a faster evaluation time because we would need to carry out fewer lookups
in $T_r$. But, for both values of $\flat$ the lookup table $T_r$ fits entirely in
the CPU cache and we did not realize any significant difference in
the evaluation times. Therefore we settle for $\flat=8$.
We remark that each $r \ge 2$ requires
a different lookup table $T_r$ that can be generated a priori.
% To do this in $O(1/\epsilon)$ time
% we use a lookup table $T_r$ that allows us to count
% the number of assigned vertices in $\flat=\epsilon \log m$ bits
% in constant time for any $0 < \epsilon < 1$.
% In general the PHFs or MPHFs are constructed based on random acyclic $r$-partite hypergraphs $G_r=(V,E)$,
% where $V= V_0 \cup V_1 \cup \dots \cup V_{r-1}$ and $|V_i| = \eta = \lceil \frac{m}{r} \rceil$, where $0\leq i < r$.
% The most efficient and compact functions are generated
% when $r=3$ and $m=1.23n$. The value $1.23n$ is required to generate a
% random acyclic $3$-partite hypergraph with high probability\footnote{Throughout this paper
% we write ``with high probability'' to mean with probability
% $1 - n^{-\delta}$ for $\delta > 0$.}~\cite{b08,bpz07}.
% the family of linear transformations
% presented in \cite{admp99}. A still faster option is the Jenkins function
% proposed in \cite{j97}, which was used for all methods considered in this paper.
The resulting
MPHFs have the following form:
$h(x) = \mathit{rank}(\mathit{phf}(x))$.
Then, we cannot get rid of
the raking information by replacing the values 3 by 0 in the entries of $g$.
% The array
% $g$ is now representing a function $g:V\to \{0,1,2,3\}$
% and $\mathit{rank}: V \to [0,n-1]$ is
% now the cardinality of
% $\{ u\in V \;\mid\; u<v \wedge g[u] \not = 3\}$.
% Notice that a vertex $u$ is assigned if $g[u] \neq 3$.
In this case each entry in the array $g$ is encoded
with $2$ bits and we need $\epsilon m$ additional bits to compute function
$\mathit{rank}$ in constant time. Then, the total space to store
the resulting functions is $(2 + \epsilon)m = (2 + \epsilon)cn$ bits.
By using $c = 1.23$ and $\epsilon = 0.125$
we have obtained MPHFs that require approximately $2.62$ bits per key to be stored.
% Figure~\ref{prog:ram} presents a pseudo code for
% the BDZ algorithm, showing how to implement the mapping,
% assigning, and ranking steps. Next, it shows how to evaluate the PHF and the MPHF.
% The MPHF algorithm uses a lookup table, which is also shown in the figure.
%
% \begin{figure}
% \begin{center}
% \vspace{-10mm}
% \begin{lstlisting}[multicols=2]
% @{\bf BDZ Algorithm}\\[1mm]@
% @{\bf Input:} key set $S$, a constant $c \ge 1.23$, a constant $b_k$
% and a family of ``good'' hash functions $\cal H$.\\[1mm]@
% @{\bf Output:} an array $g$ with $m = \lceil cn \rceil$ 2-bit entries, and a rankTable with $(m >\!> b_k + 1)$ $\delta$-bit entries, where $\delta = 32 \text{ or } 64$ depending on the architecture. The operator $>\!>$ denotes the right shift of bits.\\[2mm]@
% void @BDZ@ (@$S$@, @$\cal H$@, @$c$@, @$b_k$@, @$g$@, @rankTable@)@\\[2mm]@
% // Mapping step
% do
% @$G.E = \emptyset$@;
% select @$h'$@ at random from @$\cal H$@;
% for @{\bf each}@ @$x \in S$@ do
% @$H'$ = $h'(x)$@;
% @$e$@ = @$\{h_0(x), h_1(x), h_2(x)\}$@;
% addEdge (@$G$@, @$e$@);
% @$\cal L$@ = isAcyclic(@$G$@);
% while (@$G.E$@ is not empty);
%
% // Assigning step
% for (@$u = 0$@; @$u < m$@; @$u$++@)
% Visited[@$u$@] = @{\bf false}@;
% @$g[u]$@ = @$3$@;
% for (i = @$|{\cal L}|-1$@; i @$\ge 0$@; i@$--$@)
% @$e$@ = @$\cal L$@[i];
% sum = 0;
% for (@$v$@ = 2; @$v \ge 0$@; @$v$@@$--$@)
% if (not Visited[@$e[v]$@])
% Visited[@$e[v]$@] = @{\bf true}@;
% @$u$@ = @$e[v]$@;
% @$j$@ = @$v$@;
% else sum += @$g[e[v]]$@;
% @g[u]@ = @$(j - \mathrm{sum}) \bmod 3$@;
%
% // Ranking step
% sum = 0;
% kmask = @$(2^{b_k}-1)$@;
% for (i = 0; i < @$|g|$@; i++)
% if((i & kmask) @==@ 0)
% rankTable[i @$>\!> b_k$@] = sum;
% if(@$g$@[i] @$\not = 3$@) sum++;
%
% @{\bf PHF Algorithm}\\[1mm]@
% @{\bf Input:} a key $x \in S$, an array $g$ with $m = \lceil cn \rceil$ 2-bit entries, where $c \ge 1.23$, and the ``good'' hash functions $h'$ selected by the BDZ algorithm.\\[1mm]@
% @{\bf Output:} the perfect hash function value for the key $x \in S$.\\[2mm]@
% int phf (@$x$@, @$g$@, @$h'$@)
% @$H'$@ = @$h'(x)$@;
% @$e$@ = @$\{h_0(x), h_1(x), h_2(x)\}$@;
% @$v$@ = @$(g[e[0]] + g[e[1]] + g[e[2]]) \bmod 3$@;
% return @$e[v]$@;
%
% @{\bf Algorithm to Generate the Lookup Table}\\[1mm]@
% @{\bf Input:} none\\[1mm]@
% @{\bf Output:} the lookup table @$T_r$@ to be used by the mphf function. It counts the number of assigned
% vertices in a single byte. As each entry in the array $g$ is encoded by 2 bits, then a single byte can store at most four 2-bit values. LS($i'$,2) stands for the value of the 2 least significant bits of $i'$.\\[2mm]@
% void genLookupTable (@$T_r$@)
% for (i = 0; i < 256; i++)
% sum = 0;
% @$i'$@ = i;
% for (j = 0; j < 4; j++)
% if(@$\text{LS}(i',2) \not = 3$@) sum++;
% @$i'$@ = @$i' >\!> 2$@;
% @$T_r[i]$@ = sum;
%
% @{\bf MPHF Algorithm}\\[1mm]@
% @{\bf Input:} a key $x \in S$, an array $g$ with $m = \lceil cn \rceil$ 2-bit entries, where $c \ge 1.23$, the chosen ``good'' hash functions $h'$, a constant $b_k$ that makes $k=2^{b_k}$, the lookup table $T_r$ that counts the number of assigned vertices in a single byte, and a rankTable with $(m >\!> b_k + 1)$ $\delta$-bit entries, where $\delta = 32 \text{ or } 64$ depending on the architecture. The notation $g[i \to j]$ represents the values stored in the entries from $g[i]$ to $g[j]$ for $i\leq j$.\\[1mm]@
% @{\bf Output:} the minimal perfect hash function value for the key $x \in S$.\\[2mm]@
% int mphf (@$x$@, @$g$@, @$h'$@, @$b_k$@, @$T_r$@, @rankTable@)
% @$u$@ = phf(@$x$@, @$g$@, @$h'$@);
% j = @$u >\!> b_k$@; // @j@ = @$u$@/k
% rank = rankTable[j];
% i = j @$<\!< b_k$@; // @i@ = @j*k@
% for(j = i + 4; j < u; i = j, j += 4)
% rank += @$T_r[g[$@i @$\to$@ j@$]]$@;
% for(j = j - 4; j < u; j ++)
% if(@$g$@[j] @$\not =$@ 3) rank ++ ;
% return rank;
% \end{lstlisting}
% \end{center}
% \vspace{-6mm}
% \caption{The BDZ algorithm and the resulting PHFs and MPHFs.}
% \label{prog:ram}
% \vspace{-7mm}
% \end{figure}
$\eta$ ~~
$\epsilon$ ~~
$\varepsilon$

12
tex/bdz/makefile Executable file
View File

@@ -0,0 +1,12 @@
all:
latex bdz.tex
bibtex bdz
latex bdz.tex
latex bdz.tex
dvips bdz.dvi -o bdz.ps
run: clean all
gv bdz.ps &
html: clean all
latex2html bdz.tex
clean:
rm bdz.dvi bdz.ps *.lot *.lof *.aux *.bbl *.blg *.log *.toc