Use a bucket sort for PackReverseIndex.
Previously it took 1200ms to create a reverse index (sorted by offset). Using a simple bucket sort algorithm, that time is reduced to 450ms. The bucket index into the offset array is kept, in order to decrease the binary search window. Don't keep a copy of the offsets. Instead, use nth position to lookup the offset in the PackIndex. Change-Id: If51ab76752622e04a4430d9a14db95ad02f5329d
This commit is contained in:
parent
903fb9c739
commit
6cc532a43c
|
@ -44,7 +44,6 @@
|
||||||
package org.eclipse.jgit.internal.storage.file;
|
package org.eclipse.jgit.internal.storage.file;
|
||||||
|
|
||||||
import java.text.MessageFormat;
|
import java.text.MessageFormat;
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
import org.eclipse.jgit.errors.CorruptObjectException;
|
import org.eclipse.jgit.errors.CorruptObjectException;
|
||||||
import org.eclipse.jgit.internal.JGitText;
|
import org.eclipse.jgit.internal.JGitText;
|
||||||
|
@ -65,21 +64,22 @@ public class PackReverseIndex {
|
||||||
/** Index we were created from, and that has our ObjectId data. */
|
/** Index we were created from, and that has our ObjectId data. */
|
||||||
private final PackIndex index;
|
private final PackIndex index;
|
||||||
|
|
||||||
/**
|
/** The number of bytes per entry in the offsetIndex. */
|
||||||
* (offset31, truly) Offsets accommodating in 31 bits.
|
private final long bucketSize;
|
||||||
*/
|
|
||||||
private final int offsets32[];
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Offsets not accommodating in 31 bits.
|
* An index into the nth mapping, where the value is the position after the
|
||||||
|
* the last index that contains the values of the bucket. For example given
|
||||||
|
* offset o (and bucket = o / bucketSize), the offset will be contained in
|
||||||
|
* the range nth[offsetIndex[bucket - 1]] inclusive to
|
||||||
|
* nth[offsetIndex[bucket]] exclusive.
|
||||||
|
*
|
||||||
|
* See {@link #binarySearch}
|
||||||
*/
|
*/
|
||||||
private final long offsets64[];
|
private final int[] offsetIndex;
|
||||||
|
|
||||||
/** Position of the corresponding {@link #offsets32} in {@link #index}. */
|
/** Mapping from indices in offset order to indices in SHA-1 order. */
|
||||||
private final int nth32[];
|
private final int[] nth;
|
||||||
|
|
||||||
/** Position of the corresponding {@link #offsets64} in {@link #index}. */
|
|
||||||
private final int nth64[];
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create reverse index from straight/forward pack index, by indexing all
|
* Create reverse index from straight/forward pack index, by indexing all
|
||||||
|
@ -92,38 +92,58 @@ public PackReverseIndex(final PackIndex packIndex) {
|
||||||
index = packIndex;
|
index = packIndex;
|
||||||
|
|
||||||
final long cnt = index.getObjectCount();
|
final long cnt = index.getObjectCount();
|
||||||
final long n64 = index.getOffset64Count();
|
if (cnt + 1 > Integer.MAX_VALUE)
|
||||||
final long n32 = cnt - n64;
|
|
||||||
if (n32 > Integer.MAX_VALUE || n64 > Integer.MAX_VALUE
|
|
||||||
|| cnt > 0xffffffffL)
|
|
||||||
throw new IllegalArgumentException(
|
throw new IllegalArgumentException(
|
||||||
JGitText.get().hugeIndexesAreNotSupportedByJgitYet);
|
JGitText.get().hugeIndexesAreNotSupportedByJgitYet);
|
||||||
|
|
||||||
offsets32 = new int[(int) n32];
|
if (cnt == 0) {
|
||||||
offsets64 = new long[(int) n64];
|
bucketSize = Long.MAX_VALUE;
|
||||||
nth32 = new int[offsets32.length];
|
offsetIndex = new int[1];
|
||||||
nth64 = new int[offsets64.length];
|
nth = new int[0];
|
||||||
|
return;
|
||||||
int i32 = 0;
|
|
||||||
int i64 = 0;
|
|
||||||
for (final MutableEntry me : index) {
|
|
||||||
final long o = me.getOffset();
|
|
||||||
if (o <= Integer.MAX_VALUE)
|
|
||||||
offsets32[i32++] = (int) o;
|
|
||||||
else
|
|
||||||
offsets64[i64++] = o;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Arrays.sort(offsets32);
|
final long[] offsetsBySha1 = new long[(int) cnt];
|
||||||
Arrays.sort(offsets64);
|
|
||||||
|
|
||||||
int nth = 0;
|
long maxOffset = 0;
|
||||||
|
int ith = 0;
|
||||||
for (final MutableEntry me : index) {
|
for (final MutableEntry me : index) {
|
||||||
final long o = me.getOffset();
|
final long o = me.getOffset();
|
||||||
if (o <= Integer.MAX_VALUE)
|
offsetsBySha1[ith++] = o;
|
||||||
nth32[Arrays.binarySearch(offsets32, (int) o)] = nth++;
|
if (o > maxOffset)
|
||||||
else
|
maxOffset = o;
|
||||||
nth64[Arrays.binarySearch(offsets64, o)] = nth++;
|
}
|
||||||
|
|
||||||
|
bucketSize = maxOffset / cnt + 1;
|
||||||
|
int[] bucketIndex = new int[(int) cnt];
|
||||||
|
int[] bucketValues = new int[(int) cnt + 1];
|
||||||
|
for (int oi = 0; oi < offsetsBySha1.length; oi++) {
|
||||||
|
final long o = offsetsBySha1[oi];
|
||||||
|
final int bucket = (int) (o / bucketSize);
|
||||||
|
final int bucketValuesPos = oi + 1;
|
||||||
|
final int current = bucketIndex[bucket];
|
||||||
|
bucketIndex[bucket] = bucketValuesPos;
|
||||||
|
bucketValues[bucketValuesPos] = current;
|
||||||
|
}
|
||||||
|
|
||||||
|
int nthByOffset = 0;
|
||||||
|
nth = new int[offsetsBySha1.length];
|
||||||
|
offsetIndex = bucketIndex; // Reuse the allocation
|
||||||
|
for (int bi = 0; bi < bucketIndex.length; bi++) {
|
||||||
|
final int start = nthByOffset;
|
||||||
|
// Insertion sort of the values in the bucket.
|
||||||
|
for (int vi = bucketIndex[bi]; vi > 0; vi = bucketValues[vi]) {
|
||||||
|
final int nthBySha1 = vi - 1;
|
||||||
|
final long o = offsetsBySha1[nthBySha1];
|
||||||
|
int insertion = nthByOffset++;
|
||||||
|
for (; start < insertion; insertion--) {
|
||||||
|
if (o > offsetsBySha1[nth[insertion - 1]])
|
||||||
|
break;
|
||||||
|
nth[insertion] = nth[insertion - 1];
|
||||||
|
}
|
||||||
|
nth[insertion] = nthBySha1;
|
||||||
|
}
|
||||||
|
offsetIndex[bi] = nthByOffset;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -136,17 +156,10 @@ public PackReverseIndex(final PackIndex packIndex) {
|
||||||
* @return object id for this offset, or null if no object was found.
|
* @return object id for this offset, or null if no object was found.
|
||||||
*/
|
*/
|
||||||
public ObjectId findObject(final long offset) {
|
public ObjectId findObject(final long offset) {
|
||||||
if (offset <= Integer.MAX_VALUE) {
|
final int ith = binarySearch(offset);
|
||||||
final int i32 = Arrays.binarySearch(offsets32, (int) offset);
|
if (ith < 0)
|
||||||
if (i32 < 0)
|
return null;
|
||||||
return null;
|
return index.getObjectId(nth[ith]);
|
||||||
return index.getObjectId(nth32[i32]);
|
|
||||||
} else {
|
|
||||||
final int i64 = Arrays.binarySearch(offsets64, offset);
|
|
||||||
if (i64 < 0)
|
|
||||||
return null;
|
|
||||||
return index.getObjectId(nth64[i64]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -166,52 +179,40 @@ public ObjectId findObject(final long offset) {
|
||||||
*/
|
*/
|
||||||
public long findNextOffset(final long offset, final long maxOffset)
|
public long findNextOffset(final long offset, final long maxOffset)
|
||||||
throws CorruptObjectException {
|
throws CorruptObjectException {
|
||||||
if (offset <= Integer.MAX_VALUE) {
|
final int ith = binarySearch(offset);
|
||||||
final int i32 = Arrays.binarySearch(offsets32, (int) offset);
|
if (ith < 0)
|
||||||
if (i32 < 0)
|
throw new CorruptObjectException(
|
||||||
throw new CorruptObjectException(
|
MessageFormat.format(
|
||||||
MessageFormat.format(
|
JGitText.get().cantFindObjectInReversePackIndexForTheSpecifiedOffset,
|
||||||
JGitText.get().cantFindObjectInReversePackIndexForTheSpecifiedOffset,
|
Long.valueOf(offset)));
|
||||||
Long.valueOf(offset)));
|
|
||||||
|
|
||||||
if (i32 + 1 == offsets32.length) {
|
if (ith + 1 == nth.length)
|
||||||
if (offsets64.length > 0)
|
return maxOffset;
|
||||||
return offsets64[0];
|
return index.getOffset(nth[ith + 1]);
|
||||||
return maxOffset;
|
|
||||||
}
|
|
||||||
return offsets32[i32 + 1];
|
|
||||||
} else {
|
|
||||||
final int i64 = Arrays.binarySearch(offsets64, offset);
|
|
||||||
if (i64 < 0)
|
|
||||||
throw new CorruptObjectException(
|
|
||||||
MessageFormat.format(
|
|
||||||
JGitText.get().cantFindObjectInReversePackIndexForTheSpecifiedOffset,
|
|
||||||
Long.valueOf(offset)));
|
|
||||||
|
|
||||||
if (i64 + 1 == offsets64.length)
|
|
||||||
return maxOffset;
|
|
||||||
return offsets64[i64 + 1];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int findPostion(long offset) {
|
int findPostion(long offset) {
|
||||||
if (offset <= Integer.MAX_VALUE) {
|
return binarySearch(offset);
|
||||||
final int i32 = Arrays.binarySearch(offsets32, (int) offset);
|
}
|
||||||
if (i32 < 0)
|
|
||||||
return -1;
|
private int binarySearch(final long offset) {
|
||||||
return i32;
|
int bucket = (int) (offset / bucketSize);
|
||||||
} else {
|
int low = bucket == 0 ? 0 : offsetIndex[bucket - 1];
|
||||||
final int i64 = Arrays.binarySearch(offsets64, offset);
|
int high = offsetIndex[bucket];
|
||||||
if (i64 < 0)
|
while (low < high) {
|
||||||
return -1;
|
final int mid = (low + high) >>> 1;
|
||||||
return nth32.length + i64;
|
final long o = index.getOffset(nth[mid]);
|
||||||
|
if (offset < o)
|
||||||
|
high = mid;
|
||||||
|
else if (offset == o)
|
||||||
|
return mid;
|
||||||
|
else
|
||||||
|
low = mid + 1;
|
||||||
}
|
}
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
ObjectId findObjectByPosition(int nthPosition) {
|
ObjectId findObjectByPosition(int nthPosition) {
|
||||||
if (nthPosition < nth32.length)
|
return index.getObjectId(nth[nthPosition]);
|
||||||
return index.getObjectId(nth32[nthPosition]);
|
|
||||||
final int i64 = nthPosition - nth32.length;
|
|
||||||
return index.getObjectId(nth64[i64]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue