Use a bucket sort for PackReverseIndex.

Previously it took 1200ms to create a reverse index (sorted by offset).
Using a simple bucket sort algorithm, that time is reduced to 450ms.
The bucket index into the offset array is kept, in order to decrease
the binary search window.

Don't keep a copy of the offsets. Instead, use nth position
to lookup the offset in the PackIndex.

Change-Id: If51ab76752622e04a4430d9a14db95ad02f5329d
This commit is contained in:
Colby Ranger 2013-06-28 10:37:56 -07:00
parent 903fb9c739
commit 6cc532a43c
1 changed files with 87 additions and 86 deletions

View File

@ -44,7 +44,6 @@
package org.eclipse.jgit.internal.storage.file; package org.eclipse.jgit.internal.storage.file;
import java.text.MessageFormat; import java.text.MessageFormat;
import java.util.Arrays;
import org.eclipse.jgit.errors.CorruptObjectException; import org.eclipse.jgit.errors.CorruptObjectException;
import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.internal.JGitText;
@ -65,21 +64,22 @@ public class PackReverseIndex {
/** Index we were created from, and that has our ObjectId data. */ /** Index we were created from, and that has our ObjectId data. */
private final PackIndex index; private final PackIndex index;
/** /** The number of bytes per entry in the offsetIndex. */
* (offset31, truly) Offsets accommodating in 31 bits. private final long bucketSize;
*/
private final int offsets32[];
/** /**
* Offsets not accommodating in 31 bits. * An index into the nth mapping, where the value is the position after the
* the last index that contains the values of the bucket. For example given
* offset o (and bucket = o / bucketSize), the offset will be contained in
* the range nth[offsetIndex[bucket - 1]] inclusive to
* nth[offsetIndex[bucket]] exclusive.
*
* See {@link #binarySearch}
*/ */
private final long offsets64[]; private final int[] offsetIndex;
/** Position of the corresponding {@link #offsets32} in {@link #index}. */ /** Mapping from indices in offset order to indices in SHA-1 order. */
private final int nth32[]; private final int[] nth;
/** Position of the corresponding {@link #offsets64} in {@link #index}. */
private final int nth64[];
/** /**
* Create reverse index from straight/forward pack index, by indexing all * Create reverse index from straight/forward pack index, by indexing all
@ -92,38 +92,58 @@ public PackReverseIndex(final PackIndex packIndex) {
index = packIndex; index = packIndex;
final long cnt = index.getObjectCount(); final long cnt = index.getObjectCount();
final long n64 = index.getOffset64Count(); if (cnt + 1 > Integer.MAX_VALUE)
final long n32 = cnt - n64;
if (n32 > Integer.MAX_VALUE || n64 > Integer.MAX_VALUE
|| cnt > 0xffffffffL)
throw new IllegalArgumentException( throw new IllegalArgumentException(
JGitText.get().hugeIndexesAreNotSupportedByJgitYet); JGitText.get().hugeIndexesAreNotSupportedByJgitYet);
offsets32 = new int[(int) n32]; if (cnt == 0) {
offsets64 = new long[(int) n64]; bucketSize = Long.MAX_VALUE;
nth32 = new int[offsets32.length]; offsetIndex = new int[1];
nth64 = new int[offsets64.length]; nth = new int[0];
return;
int i32 = 0;
int i64 = 0;
for (final MutableEntry me : index) {
final long o = me.getOffset();
if (o <= Integer.MAX_VALUE)
offsets32[i32++] = (int) o;
else
offsets64[i64++] = o;
} }
Arrays.sort(offsets32); final long[] offsetsBySha1 = new long[(int) cnt];
Arrays.sort(offsets64);
int nth = 0; long maxOffset = 0;
int ith = 0;
for (final MutableEntry me : index) { for (final MutableEntry me : index) {
final long o = me.getOffset(); final long o = me.getOffset();
if (o <= Integer.MAX_VALUE) offsetsBySha1[ith++] = o;
nth32[Arrays.binarySearch(offsets32, (int) o)] = nth++; if (o > maxOffset)
else maxOffset = o;
nth64[Arrays.binarySearch(offsets64, o)] = nth++; }
bucketSize = maxOffset / cnt + 1;
int[] bucketIndex = new int[(int) cnt];
int[] bucketValues = new int[(int) cnt + 1];
for (int oi = 0; oi < offsetsBySha1.length; oi++) {
final long o = offsetsBySha1[oi];
final int bucket = (int) (o / bucketSize);
final int bucketValuesPos = oi + 1;
final int current = bucketIndex[bucket];
bucketIndex[bucket] = bucketValuesPos;
bucketValues[bucketValuesPos] = current;
}
int nthByOffset = 0;
nth = new int[offsetsBySha1.length];
offsetIndex = bucketIndex; // Reuse the allocation
for (int bi = 0; bi < bucketIndex.length; bi++) {
final int start = nthByOffset;
// Insertion sort of the values in the bucket.
for (int vi = bucketIndex[bi]; vi > 0; vi = bucketValues[vi]) {
final int nthBySha1 = vi - 1;
final long o = offsetsBySha1[nthBySha1];
int insertion = nthByOffset++;
for (; start < insertion; insertion--) {
if (o > offsetsBySha1[nth[insertion - 1]])
break;
nth[insertion] = nth[insertion - 1];
}
nth[insertion] = nthBySha1;
}
offsetIndex[bi] = nthByOffset;
} }
} }
@ -136,17 +156,10 @@ public PackReverseIndex(final PackIndex packIndex) {
* @return object id for this offset, or null if no object was found. * @return object id for this offset, or null if no object was found.
*/ */
public ObjectId findObject(final long offset) { public ObjectId findObject(final long offset) {
if (offset <= Integer.MAX_VALUE) { final int ith = binarySearch(offset);
final int i32 = Arrays.binarySearch(offsets32, (int) offset); if (ith < 0)
if (i32 < 0) return null;
return null; return index.getObjectId(nth[ith]);
return index.getObjectId(nth32[i32]);
} else {
final int i64 = Arrays.binarySearch(offsets64, offset);
if (i64 < 0)
return null;
return index.getObjectId(nth64[i64]);
}
} }
/** /**
@ -166,52 +179,40 @@ public ObjectId findObject(final long offset) {
*/ */
public long findNextOffset(final long offset, final long maxOffset) public long findNextOffset(final long offset, final long maxOffset)
throws CorruptObjectException { throws CorruptObjectException {
if (offset <= Integer.MAX_VALUE) { final int ith = binarySearch(offset);
final int i32 = Arrays.binarySearch(offsets32, (int) offset); if (ith < 0)
if (i32 < 0) throw new CorruptObjectException(
throw new CorruptObjectException( MessageFormat.format(
MessageFormat.format( JGitText.get().cantFindObjectInReversePackIndexForTheSpecifiedOffset,
JGitText.get().cantFindObjectInReversePackIndexForTheSpecifiedOffset, Long.valueOf(offset)));
Long.valueOf(offset)));
if (i32 + 1 == offsets32.length) { if (ith + 1 == nth.length)
if (offsets64.length > 0) return maxOffset;
return offsets64[0]; return index.getOffset(nth[ith + 1]);
return maxOffset;
}
return offsets32[i32 + 1];
} else {
final int i64 = Arrays.binarySearch(offsets64, offset);
if (i64 < 0)
throw new CorruptObjectException(
MessageFormat.format(
JGitText.get().cantFindObjectInReversePackIndexForTheSpecifiedOffset,
Long.valueOf(offset)));
if (i64 + 1 == offsets64.length)
return maxOffset;
return offsets64[i64 + 1];
}
} }
int findPostion(long offset) { int findPostion(long offset) {
if (offset <= Integer.MAX_VALUE) { return binarySearch(offset);
final int i32 = Arrays.binarySearch(offsets32, (int) offset); }
if (i32 < 0)
return -1; private int binarySearch(final long offset) {
return i32; int bucket = (int) (offset / bucketSize);
} else { int low = bucket == 0 ? 0 : offsetIndex[bucket - 1];
final int i64 = Arrays.binarySearch(offsets64, offset); int high = offsetIndex[bucket];
if (i64 < 0) while (low < high) {
return -1; final int mid = (low + high) >>> 1;
return nth32.length + i64; final long o = index.getOffset(nth[mid]);
if (offset < o)
high = mid;
else if (offset == o)
return mid;
else
low = mid + 1;
} }
return -1;
} }
ObjectId findObjectByPosition(int nthPosition) { ObjectId findObjectByPosition(int nthPosition) {
if (nthPosition < nth32.length) return index.getObjectId(nth[nthPosition]);
return index.getObjectId(nth32[nthPosition]);
final int i64 = nthPosition - nth32.length;
return index.getObjectId(nth64[i64]);
} }
} }