\section{Experimental Results} % We now present some experimental results. % The same experiments were run with our algorithm and % the algorithm due to Czech, Havas and Majewski~\cite{chm92}, referred to as % the CHM algorithm. % The two algorithms were implemented in the C language and % are available at \texttt{http://cmph.sf.net}. % Our data consists % of a collection of 100 million % universe resource locations (URLs) collected from the Web. % The average length of a URL in the collection is 63 bytes. % All experiments were carried out on % a computer running the Linux operating system, version 2.6.7, % with a 2.4 gigahertz processor and % 4 gigabytes of main memory. % % Table~\ref{tab:characteristics} presents the main characteristics % of the two algorithms. % The number of edges in the graph $G=(V,E)$ is~$|S|=n$, % the number of keys in the input set~$S$. % The number of vertices of $G$ is equal to $1.15n$ and $2.09n$ % for our algorithm and the CHM algorithm, respectively. % This measure is related to the amount of space to store the array $g$. % This improves the space required to store a function in our algorithm to % $55\%$ of the space required by the CHM algorithm. % The number of critical edges % is $\frac{1}{2}|E(G)|$ and 0 for our algorithm and the CHM algorithm, % respectively. % Our algorithm generates random graphs that contain cycles with high % probability and the % CHM algorithm % generates % acyclic random graphs. % Finally, the CHM algorithm generates order preserving functions % while our algorithm does not preserve order. % % \vspace{-10pt} % \begin{table}[htb] % {\footnotesize % \begin{center} % \begin{tabular}{|c|c|c|c|c|c|c|} % \hline % & $c$ & $|E(G)|$ & $|V(G)|=|g|$ & $|E(G_\crit)|$ & $G$ & Order preserving \\ % \hline % Our algorithm & 1.15 & $n$ & $cn$ & $0.5|E(G)|$ & cyclic & no \\ % \hline % CHM algorithm & 2.09 & $n$ & $cn$ & 0 & acyclic & yes \\ % \hline % \end{tabular} % \end{center} % } % \caption{Main characteristics of the algorithms} % \vspace{-25pt} % \label{tab:characteristics} % \end{table} % % Table~\ref{tab:timeresults} presents time measurements. % All times are in seconds. % The table entries are averages over 50 trials. % The column labelled $N_i$ gives % the number of iterations to generate the random graph $G$ % in the mapping step of the algorithms. % The next columns give the running times % for the mapping plus ordering steps together and the searching % step for each algorithm. % The last column gives the percentage gain of our algorithm % over the CHM algorithm. % % \begin{table*} % {\footnotesize % \begin{center} % \begin{tabular}{|c|cccc|cccc|c|} % \hline % \raisebox{-0.7em}{$n$} & \multicolumn{4}{c|}{\raisebox{-1mm}{Our algorithm}} & % \multicolumn{4}{c|}{\raisebox{-1mm}{CHM algorithm}}& \raisebox{-0.2em}{Gain}\\ % \cline{2-5} \cline{6-9} % & \raisebox{-1mm}{$N_i$} &\raisebox{-1mm}{Map+Ord} & % \raisebox{-1mm}{Search} &\raisebox{-1mm}{Total} & % \raisebox{-1mm}{$N_i$} &\raisebox{-1mm}{Map+Ord} &\raisebox{-1mm}{Search} & % \raisebox{-1mm}{Total} & \raisebox{0.2em}{(\%)}\\ % \hline % %1,562,500 & 2.28 & 8.54 & 2.37 & 10.91 & 2.70 & 14.56 & 1.57 & 16.13 & 48 \\ %[1mm] % %3,125,000 & 2.16 & 15.92 & 4.88 & 20.80 & 2.85 & 30.36 & 3.20 & 33.56 & 61 \\ %[1mm] % 6,250,000 & 2.20 & 33.09 & 10.48 & 43.57 & 2.90 & 62.26 & 6.76 & 69.02 & 58 \\ %[1mm] % 12,500,000 & 2.00 & 63.26 & 23.04 & 86.30 & 2.60 & 117.99 & 14.94 & 132.92 & 54 \\ %[1mm] % 25,000,000 & 2.00 & 130.79 & 51.55 & 182.34 & 2.80 & 262.05 & 33.68 & 295.73 & 62 \\ %[1mm] % %50,000,000 & 2.07 & 273.75 & 114.12 & 387.87 & 2.90 & 577.59 & 73.97 & 651.56 & 68 \\ %[1mm] % 100,000,000 & 2.07 & 567.47 & 243.13 & 810.60 & 2.80 & 1,131.06 & 157.23 & 1,288.29 & 59 \\ %[1mm] % \hline % \end{tabular} % \end{center} % \caption{Time measurements % for our algorithm and the CHM algorithm} % \vspace{-25pt} % \label{tab:timeresults} % }\end{table*} % % \enlargethispage{\baselineskip} % The mapping step of the new algorithm is faster because % the expected number of iterations in the mapping step to generate % $G$ are 2.13 and 2.92 for our algorithm and the CHM algorithm, respectively. % The graph $G$ generated by our algorithm % has $1.15n$ vertices, against $2.09n$ for the CHM algorithm. % These two facts make our algorithm faster in the mapping step. % The ordering step of our algorithm is approximately equal to % the time to check if $G$ is acyclic for the CHM algorithm. % The searching step of the CHM algorithm is faster, but the total % time of our algorithm is, on average, approximately 58\% faster % than the CHM algorithm. % % The experimental results fully backs the theoretical results. % It is important to notice the times for the searching step: % for both algorithms they are not the dominant times, % and the experimental results clearly show % a linear behavior for the searching step. % % We now present a heuristic that reduces the space requirement % to any given value between $1.15n$ words and $0.93n$ words. % The heuristic reuses, when possible, the set % of $x$ values that caused reassignments, just before trying $x+1$ % (see Section~\ref{sec:searching}). % The lower limit $c=0.93$ was obtained experimentally. % We generate $10{,}000$ random graphs for % each size $n$ ($n=10^5$, $5 \times 10^5$, $10^6$, $2\times 10^6$). % With $c=0.93$ we were always able to generate~$h$, but with $c=0.92$ we never % succeeded. % Decreasing the value of $c$ leads to an increase in the number of % iterations to generate $G$. % For example, for $c=1$ and $c=0.93$, the analytical expected number % of iterations are $2.72$ and $3.17$, respectively % (for $n=12{,}500{,}000$, the number of iterations are 2.78 for $c=1$ and 3.04 % for $c=0.93$). % Table~\ref{tab:timeresults2} presents the total times to construct a % function for $n=12{,}500{,}000$, with an increase from $86.31$ seconds % for $c=1.15$ (see Table~\ref{tab:timeresults}) to % $101.74$ seconds for $c=1$ and to $102.19$ seconds for $c=0.93$. % % \vspace{-5pt} % \begin{table*} % {\footnotesize % \begin{center} % \begin{tabular}{|c|cccc|cccc|} % \hline % \raisebox{-0.7em}{$n$} & \multicolumn{4}{c|}{\raisebox{-1mm}{Our algorithm $c=1.00$}} & % \multicolumn{4}{c|}{\raisebox{-1mm}{Our algorithm $c=0.93$}} \\ % \cline{2-5} \cline{6-9} % & \raisebox{-1mm}{$N_i$} &\raisebox{-1mm}{Map+Ord} & % \raisebox{-1mm}{Search} &\raisebox{-1mm}{Total} & % \raisebox{-1mm}{$N_i$} &\raisebox{-1mm}{Map+Ord} &\raisebox{-1mm}{Search} & % \raisebox{-1mm}{Total} \\%[0.3mm] % \hline%\\[-2mm] % 12,500,000 & 2.78 & 76.68 & 25.06 & 101.74 & 3.04 & 76.39 & 25.80 & 102.19 \\ %[1mm] % \hline % \end{tabular} % \end{center} % \caption{Time measurements % for our tuned algorithm with $c=1.00$ and $c=0.93$} % \vspace{-25pt} % \label{tab:timeresults2} % } % \end{table*} % % We compared our algorithm with the ones proposed by Pagh~\cite{p99} and % Dietzfelbinger and Hagerup~\cite{dh01}, respectively. The authors sent to us their % source code. In their implementation the set of keys is a set of random integers. % We modified our implementation to generate our~$h$ from a set of random % integers in order to make a fair comparison. For a set of $10^6$ random integers, % the times to generate a minimal perfect hash function were $2.7 s$, $4 s$ and $4.5 s$ for % our algorithm, Pagh's algorithm and Dietzfelbinger and Hagerup's algorithm, respectively. % Thus, our algorithm was 48\% faster than Pagh's algorithm and 67\% faster than % Dietzfelbinger and Hagerup's algorithm, on average. This gain was maintained for sets with different % sizes. % Our algorithm needs $kn$ ($k \in [0.93, 1.15]$) words to store % the resulting function, while Pagh's algorithm needs $kn$ ($k > 2$) words and % Dietzfelbinger and Hagerup's algorithm needs $kn$ ($k \in [1.13, 1.15]$) words. % The time to generate the functions is inversely proportional to the value of $k$. % \enlargethispage{\baselineskip}