Documentation updated for release 0.9

2009-06-12 21:49:26 -03:00
parent 37444720b5
commit 9ae0e10732
274 changed files with 2951 additions and 217 deletions
--- a/tex/chd/chd.bib
+++ b/tex/chd/chd.bib
@@ -0,0 +1,176 @@
+@inproceedings{bpz07,
+    author = {F.C. Botelho and R. Pagh and N. Ziviani},
+    title = {Simple and Space-Efficient Minimal Perfect Hash Functions},
+    booktitle = {Proceedings of the 10th Workshop on Algorithms and Data Structures (WADs'07)},
+    publisher = {Springer LNCS vol. 4619},
+    pages = {139-150},
+    Moth = August,
+    location = {Halifax, Canada},
+    year = 2007,
+    key = {author}
+}
+
+@inproceedings{pb06,
+ author = {B. Prabhakar and F. Bonomi},
+ title = {Perfect Hashing for Network Applications},
+ booktitle = {Proceedings of the IEEE International Symposium
+on Information Theory},
+ year = {2006},
+ location = {Seattle, Washington, USA},
+ publisher = {IEEE Press}
+ }
+
+@inproceedings{dp08,
+ author = {Martin Dietzfelbinger and Rasmus Pagh},
+ title = {Succinct Data Structures for Retrieval and Approximate Membership},
+ booktitle = {Proceedings of the 35th international colloquium on Automata, Languages and Programming (ICALP'08)},
+ year = {2008},
+ isbn = {978-3-540-70574-1},
+ pages = {385--396},
+ location = {Reykjavik, Iceland},
+ doi = {http://dx.doi.org/10.1007/978-3-540-70575-8_32},
+ publisher = {Springer-Verlag},
+ address = {Berlin, Heidelberg},
+ }
+
+
+@inproceedings{bbd09,
+    author = {D. Belazzougui, F.C. Botelho and M. Dietzfelbinger},
+    title = {Compress, Hash and Displace},
+    booktitle = {Proceedings of the 17th European Symposium on Algorithms (ESA'09)},
+    publisher = {Springer LNCS},
+    OPTpages = {139-150},
+    Moth = September,
+    location = {Copenhagen, Denmark},
+    year = 2009,
+    key = {author}
+}
+
+@PhdThesis{b08,
+author = {F. C. Botelho},
+title = {Near-Optimal Space Perfect Hashing Algorithms},
+school = {Federal University of Minas Gerais},
+year = {2008},
+OPTkey = {},
+OPTtype = {},
+OPTaddress = {},
+month = {September},
+note = {Supervised by Nivio Ziviani, \url{http://www.dcc.ufmg.br/pos/cursos/defesas/255D.PDF}},
+OPTannote = {},
+OPTurl = {http://www.dcc.ufmg.br/pos/cursos/defesas/255D.PDF},
+OPTdoi = {},
+OPTissn = {},
+OPTlocalfile = {},
+OPTabstract = {}
+}
+
+@Article{mwhc96,
+  author =   {B.S. Majewski and N.C. Wormald and G. Havas and Z.J. Czech},
+  title =    {A family of perfect hashing methods},
+  journal =      {The Computer Journal},
+  year =     {1996},
+  volume =   {39},
+  number =   {6},
+  pages =    {547-554},
+  key = {author}
+}
+
+@inproceedings{ckrt04,
+ author = {B. Chazelle and J. Kilian and R. Rubinfeld and A. Tal},
+ title = {The Bloomier Filter: An Efficient Data Structure for Static Support Lookup Tables},
+ booktitle = {Proceedings of the 15th annual ACM-SIAM symposium on Discrete algorithms (SODA'04)},
+ year = {2004},
+ isbn = {0-89871-558-X},
+ pages = {30--39},
+ location = {New Orleans, Louisiana},
+ publisher = {Society for Industrial and Applied Mathematics},
+ address = {Philadelphia, PA, USA},
+ optpublisher = {Society for Industrial and Applied Mathematics}
+ }
+
+@Article{j97,
+  author =       {B. Jenkins},
+  title =        {Algorithm Alley: Hash Functions},
+  journal =      {Dr. Dobb's Journal of Software Tools},
+  volume =       {22},
+  number =       {9},
+  month =        {september},
+  year =         {1997},
+  note = {Extended version available at \url{http://burtleburtle.net/bob/hash/doobs.html}}
+}
+
+
+@Article{e87,
+  author =   {J. Ebert},
+  title =    {A Versatile Data Structure for Edges Oriented Graph Algorithms},
+  journal =      {Communication of The ACM},
+  year =     {1987},
+  OPTkey =   {},
+  OPTvolume =    {},
+  number =   {30},
+  pages =    {513-519},
+  OPTmonth =     {},
+  OPTnote =      {},
+  OPTannote =    {}
+}
+
+@article {dict-jour,
+   AUTHOR = {R. Pagh},
+    TITLE = {Low Redundancy in Static Dictionaries with Constant Query Time},
+  OPTJOURNAL = sicomp,
+  JOURNAL = fsicomp,
+   VOLUME = {31},
+     YEAR = {2001},
+   NUMBER = {2},
+    PAGES = {353--363},
+}
+
+
+@inproceedings{sg06,
+  author    = {K. Sadakane and R. Grossi},
+  title     = {Squeezing succinct data structures into entropy bounds},
+  booktitle = {Proceedings of the 17th annual ACM-SIAM symposium on Discrete algorithms (SODA'06)},
+  year      = {2006},
+  pages     = {1230--1239}
+}
+
+@inproceedings{gn06,
+  author    = {R. Gonzalez and
+               G. Navarro},
+  title     = {Statistical Encoding of Succinct Data Structures},
+  booktitle = {Proceedings of the 19th Annual Symposium on Combinatorial Pattern Matching (CPM'06)},
+  year      = {2006},
+  pages     = {294--305}
+}
+
+@inproceedings{fn07,
+  author    = {K. Fredriksson and
+               F. Nikitin},
+  title     = {Simple Compression Code Supporting Random Access and Fast
+               String Matching},
+  booktitle = {Proceedings of the 6th International Workshop on Efficient and Experimental Algorithms (WEA'07)},
+  year      = {2007},
+  pages     = {203--216}
+}
+
+@inproceedings{os07,
+  author    = {D. Okanohara and K. Sadakane},
+  title     = {Practical Entropy-Compressed Rank/Select Dictionary},
+  booktitle = {Proceedings of the Workshop on Algorithm Engineering and
+               Experiments (ALENEX'07)},
+  year      = {2007},
+  location = {New Orleans, Louisiana, USA}
+ }
+
+
+@inproceedings{rrr02,
+ author = {R. Raman and V. Raman and S. S. Rao},
+ title = {Succinct indexable dictionaries with applications to encoding k-ary trees and multisets},
+ booktitle = {Proceedings of the thirteenth annual ACM-SIAM symposium on Discrete algorithms (SODA'02)},
+ year = {2002},
+ isbn = {0-89871-513-X},
+ pages = {233--242},
+ location = {San Francisco, California},
+ publisher = {Society for Industrial and Applied Mathematics},
+ address = {Philadelphia, PA, USA},
+ }
--- a/tex/chd/chd.tex
+++ b/tex/chd/chd.tex
@@ -0,0 +1,70 @@
+\documentclass[12pt]{article}
+\usepackage{graphicx}
+
+\usepackage{latexsym}
+\usepackage{url}
+
+\usepackage{a4wide}
+\usepackage{amsmath}
+\usepackage{amssymb}
+\usepackage{amsfonts}
+\usepackage{graphicx}
+\usepackage{listings}
+\usepackage{fancyhdr}
+\usepackage{graphics}
+\usepackage{multicol}
+\usepackage{epsfig}
+\usepackage{textcomp}
+\usepackage{url}
+
+% \usepackage{subfigure}
+% \usepackage{subfig}
+% \usepackage{wrapfig}
+
+
+\bibliographystyle{plain}
+% \bibliographystyle{sbc}
+% \bibliographystyle{abnt-alf}
+% \bibliographystyle{abnt-num}
+
+\begin{document}
+
+\sloppy
+
+% \renewcommand{\baselinestretch}{1.24}\normalsize % set the space between lines to 1.24
+
+% set headings
+% \pagestyle{fancy}
+% \lhead[\fancyplain{}{\footnotesize\thepage}]
+%       {\fancyplain{}{\footnotesize\rightmark}}
+% \rhead[\fancyplain{}{\footnotesize\leftmark}]
+%       {\fancyplain{}{\footnotesize\thepage}}
+% 
+% \cfoot{}
+
+\lstset{
+  language=C,
+  basicstyle=\fontsize{8}{8}\selectfont,
+  captionpos=t,
+  aboveskip=0mm,
+  belowskip=0mm,
+  abovecaptionskip=0.5mm,
+  belowcaptionskip=0.5mm,
+%  numbers = left,
+  mathescape=true,
+  escapechar=@,
+  extendedchars=true,
+  showstringspaces=false,
+%   columns=fixed,
+  basewidth=0.515em,
+  frame=single,
+  framesep=1mm,
+  xleftmargin=1mm,
+  xrightmargin=1mm,
+  framerule=0pt
+}
+
+\include{introduction} % Introducao
+\bibliography{chd}
+
+\end{document}
--- a/tex/chd/introduction.tex
+++ b/tex/chd/introduction.tex
@@ -0,0 +1,38 @@
+\section{Introduction} \label{sec:introduction}
+
+
+The important performance parameters of a PHF are representation size, evaluation time and construction time. The representation size plays an important role when the whole function fits in a faster memory and the actual data is stored in a slower memory. For instace, compact PHFs can be entirely fit in a CPU cache and this makes their computation really fast by avoiding cache misses. The CHD algorithm plays an important role in this context. It was designed by Djamal Belazzougui, Fabiano C. Botelho, and Martin Dietzfelbinger in \cite{bbd09}.
+
+
+The CHD algorithm permits to obtain PHFs with representation size very close to optimal while retaining $O(n)$ construction time and $O(1)$ evaluation time. For example, in the case $m=2n$ we obtain a PHF that uses space $0.67$ bits per key, and for $m=1.23n$ we obtain space $1.4$ bits per key, which was not achievable with previously known methods. The CHD algorithm is inspired by several known algorithms; 
+the main new feature is that it combines a modification of Pagh's ``hash-and-displace'' approach
+with data compression on a sequence of hash function indices. 
+That combination makes it possible to significantly reduce space usage 
+while retaining linear construction time and constant query time. 
+The CHD algorithm can also be used for $k$-perfect hashing,
+where at most $k$ keys may be mapped to the same value.
+For the analysis we assume that fully random hash functions are given for free;
+such assumptions can be justified and were made in previous papers.
+
+The compact PHFs generated by the CHD algorithm can be used in many applications in which we want to assign a unique identifier to each key without storing any information on the key. One of the most obvious applications of those functions 
+(or $k$-perfect hash functions) is when we have a small fast memory in which we can store the perfect hash function while the keys and associated satellite data are stored in slower but larger memory. 
+The size of a block or a transfer unit may be chosen so that $k$ data items can be retrieved in
+one read access. In this case we can ensure that data associated with a key can be retrieved in a single probe to slower memory. This has been used for example in hardware routers~\cite{pb06}. 
+% Perfect hashing has also been found to be competitive with traditional hashing in internal memory~\cite{blmz08} on standard computers. Recently perfect hashing has been used to accelerate algorithms on graphs~\cite{ESS08} when the graph representation does not fit in main memory.
+
+
+The CHD algorithm generates the most compact PHFs and MPHFs we know of in~$O(n)$ time. 
+The time required to evaluate the generated functions is constant (in practice less than $1.4$ microseconds). 
+The storage space of the resulting PHFs and MPHFs are distant from the information 
+theoretic lower bound by a factor of $1.43$.
+The closest competitor is the algorithm by Martin and Pagh \cite{dp08} but
+their algorithm do not work in linear time.
+Furthermore, the CHD algorithm 
+can be tuned to run faster than the BPZ algorithm \cite{bpz07} (the fastest algorithm
+available in the literature so far) and to obtain more compact functions.
+The most impressive characteristic is that it has the ability, in principle, to
+approximate the information theoretic lower bound while being practical.
+A detailed description of the CHD algorithm can be found in \cite{bbd09}. 
+
+
+
--- a/tex/chd/makefile
+++ b/tex/chd/makefile
@@ -0,0 +1,12 @@
+all: 
+	latex chd.tex
+	bibtex chd
+	latex chd.tex
+	latex chd.tex
+	dvips chd.dvi -o chd.ps
+run: clean all 
+	gv chd.ps &
+html: clean all
+	latex2html chd.tex
+clean:
+	rm chd.dvi chd.ps *.lot *.lof *.aux *.bbl *.blg *.log *.toc