PackReverseIndex: separate out the computed implementation
PackReverseIndex is a concrete class whose implementation is computed from a pack's forward index. Callers which have a reverse index file may want to use an implementation that is file-based instead. Generalize PackReverseIndex into an interface without implementation-specific logic and separate out the logic for the computed implementation into a new concrete class. Change-Id: I98d9835363c5e1c8c3c11a81b0761af3cdeaa41a Signed-off-by: Anna Papitto <annapapitto@google.com>
This commit is contained in:
parent
faefa90f99
commit
8e61971620
|
@ -23,7 +23,7 @@
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
public class PackReverseIndexTest extends RepositoryTestCase {
|
public class PackReverseIndexComputedTest extends RepositoryTestCase {
|
||||||
|
|
||||||
private PackIndex idx;
|
private PackIndex idx;
|
||||||
|
|
||||||
|
@ -38,7 +38,7 @@ public void setUp() throws Exception {
|
||||||
super.setUp();
|
super.setUp();
|
||||||
// index with both small (< 2^31) and big offsets
|
// index with both small (< 2^31) and big offsets
|
||||||
idx = PackIndex.open(JGitTestUtil.getTestResourceFile("pack-huge.idx"));
|
idx = PackIndex.open(JGitTestUtil.getTestResourceFile("pack-huge.idx"));
|
||||||
reverseIdx = PackReverseIndex.computeFromIndex(idx);
|
reverseIdx = PackReverseIndexFactory.computeFromIndex(idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
|
@ -43,6 +43,7 @@
|
||||||
import org.eclipse.jgit.internal.storage.file.PackBitmapIndex;
|
import org.eclipse.jgit.internal.storage.file.PackBitmapIndex;
|
||||||
import org.eclipse.jgit.internal.storage.file.PackIndex;
|
import org.eclipse.jgit.internal.storage.file.PackIndex;
|
||||||
import org.eclipse.jgit.internal.storage.file.PackReverseIndex;
|
import org.eclipse.jgit.internal.storage.file.PackReverseIndex;
|
||||||
|
import org.eclipse.jgit.internal.storage.file.PackReverseIndexFactory;
|
||||||
import org.eclipse.jgit.internal.storage.pack.BinaryDelta;
|
import org.eclipse.jgit.internal.storage.pack.BinaryDelta;
|
||||||
import org.eclipse.jgit.internal.storage.pack.PackOutputStream;
|
import org.eclipse.jgit.internal.storage.pack.PackOutputStream;
|
||||||
import org.eclipse.jgit.internal.storage.pack.StoredObjectRepresentation;
|
import org.eclipse.jgit.internal.storage.pack.StoredObjectRepresentation;
|
||||||
|
@ -1068,7 +1069,7 @@ private DfsBlockCache.Ref<PackReverseIndex> loadReverseIdx(
|
||||||
DfsReader ctx, DfsStreamKey revKey, PackIndex idx) {
|
DfsReader ctx, DfsStreamKey revKey, PackIndex idx) {
|
||||||
ctx.stats.readReverseIdx++;
|
ctx.stats.readReverseIdx++;
|
||||||
long start = System.nanoTime();
|
long start = System.nanoTime();
|
||||||
PackReverseIndex revidx = PackReverseIndex.computeFromIndex(idx);
|
PackReverseIndex revidx = PackReverseIndexFactory.computeFromIndex(idx);
|
||||||
reverseIndex = revidx;
|
reverseIndex = revidx;
|
||||||
ctx.stats.readReverseIdxMicros += elapsedMicros(start);
|
ctx.stats.readReverseIdxMicros += elapsedMicros(start);
|
||||||
return new DfsBlockCache.Ref<>(
|
return new DfsBlockCache.Ref<>(
|
||||||
|
|
|
@ -1149,7 +1149,7 @@ synchronized PackBitmapIndex getBitmapIndex() throws IOException {
|
||||||
|
|
||||||
private synchronized PackReverseIndex getReverseIdx() throws IOException {
|
private synchronized PackReverseIndex getReverseIdx() throws IOException {
|
||||||
if (reverseIdx == null)
|
if (reverseIdx == null)
|
||||||
reverseIdx = PackReverseIndex.computeFromIndex(idx());
|
reverseIdx = PackReverseIndexFactory.computeFromIndex(idx());
|
||||||
return reverseIdx;
|
return reverseIdx;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -10,11 +10,7 @@
|
||||||
|
|
||||||
package org.eclipse.jgit.internal.storage.file;
|
package org.eclipse.jgit.internal.storage.file;
|
||||||
|
|
||||||
import java.text.MessageFormat;
|
|
||||||
|
|
||||||
import org.eclipse.jgit.errors.CorruptObjectException;
|
import org.eclipse.jgit.errors.CorruptObjectException;
|
||||||
import org.eclipse.jgit.internal.JGitText;
|
|
||||||
import org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry;
|
|
||||||
import org.eclipse.jgit.lib.ObjectId;
|
import org.eclipse.jgit.lib.ObjectId;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -27,106 +23,7 @@
|
||||||
* @see PackIndex
|
* @see PackIndex
|
||||||
* @see Pack
|
* @see Pack
|
||||||
*/
|
*/
|
||||||
public class PackReverseIndex {
|
public interface PackReverseIndex {
|
||||||
/** Index we were created from, and that has our ObjectId data. */
|
|
||||||
private final PackIndex index;
|
|
||||||
|
|
||||||
/** The number of bytes per entry in the offsetIndex. */
|
|
||||||
private final long bucketSize;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An index into the nth mapping, where the value is the position after the
|
|
||||||
* the last index that contains the values of the bucket. For example given
|
|
||||||
* offset o (and bucket = o / bucketSize), the offset will be contained in
|
|
||||||
* the range nth[offsetIndex[bucket - 1]] inclusive to
|
|
||||||
* nth[offsetIndex[bucket]] exclusive.
|
|
||||||
*
|
|
||||||
* See {@link #binarySearch}
|
|
||||||
*/
|
|
||||||
private final int[] offsetIndex;
|
|
||||||
|
|
||||||
/** Mapping from indices in offset order to indices in SHA-1 order. */
|
|
||||||
private final int[] nth;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute an in-memory pack reverse index from the in-memory pack forward
|
|
||||||
* index. This computation uses insertion sort, which has a quadratic
|
|
||||||
* runtime on average.
|
|
||||||
*
|
|
||||||
* @param packIndex
|
|
||||||
* the forward index to compute from
|
|
||||||
* @return the reverse index instance
|
|
||||||
*/
|
|
||||||
public static PackReverseIndex computeFromIndex(PackIndex packIndex) {
|
|
||||||
return new PackReverseIndex(packIndex);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create reverse index from straight/forward pack index, by indexing all
|
|
||||||
* its entries.
|
|
||||||
*
|
|
||||||
* @param packIndex
|
|
||||||
* forward index - entries to (reverse) index.
|
|
||||||
*/
|
|
||||||
private PackReverseIndex(PackIndex packIndex) {
|
|
||||||
index = packIndex;
|
|
||||||
|
|
||||||
final long cnt = index.getObjectCount();
|
|
||||||
if (cnt + 1 > Integer.MAX_VALUE)
|
|
||||||
throw new IllegalArgumentException(
|
|
||||||
JGitText.get().hugeIndexesAreNotSupportedByJgitYet);
|
|
||||||
|
|
||||||
if (cnt == 0) {
|
|
||||||
bucketSize = Long.MAX_VALUE;
|
|
||||||
offsetIndex = new int[1];
|
|
||||||
nth = new int[0];
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
final long[] offsetsBySha1 = new long[(int) cnt];
|
|
||||||
|
|
||||||
long maxOffset = 0;
|
|
||||||
int ith = 0;
|
|
||||||
for (MutableEntry me : index) {
|
|
||||||
final long o = me.getOffset();
|
|
||||||
offsetsBySha1[ith++] = o;
|
|
||||||
if (o > maxOffset)
|
|
||||||
maxOffset = o;
|
|
||||||
}
|
|
||||||
|
|
||||||
bucketSize = maxOffset / cnt + 1;
|
|
||||||
int[] bucketIndex = new int[(int) cnt];
|
|
||||||
int[] bucketValues = new int[(int) cnt + 1];
|
|
||||||
for (int oi = 0; oi < offsetsBySha1.length; oi++) {
|
|
||||||
final long o = offsetsBySha1[oi];
|
|
||||||
final int bucket = (int) (o / bucketSize);
|
|
||||||
final int bucketValuesPos = oi + 1;
|
|
||||||
final int current = bucketIndex[bucket];
|
|
||||||
bucketIndex[bucket] = bucketValuesPos;
|
|
||||||
bucketValues[bucketValuesPos] = current;
|
|
||||||
}
|
|
||||||
|
|
||||||
int nthByOffset = 0;
|
|
||||||
nth = new int[offsetsBySha1.length];
|
|
||||||
offsetIndex = bucketIndex; // Reuse the allocation
|
|
||||||
for (int bi = 0; bi < bucketIndex.length; bi++) {
|
|
||||||
final int start = nthByOffset;
|
|
||||||
// Insertion sort of the values in the bucket.
|
|
||||||
for (int vi = bucketIndex[bi]; vi > 0; vi = bucketValues[vi]) {
|
|
||||||
final int nthBySha1 = vi - 1;
|
|
||||||
final long o = offsetsBySha1[nthBySha1];
|
|
||||||
int insertion = nthByOffset++;
|
|
||||||
for (; start < insertion; insertion--) {
|
|
||||||
if (o > offsetsBySha1[nth[insertion - 1]])
|
|
||||||
break;
|
|
||||||
nth[insertion] = nth[insertion - 1];
|
|
||||||
}
|
|
||||||
nth[insertion] = nthBySha1;
|
|
||||||
}
|
|
||||||
offsetIndex[bi] = nthByOffset;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Search for object id with the specified start offset in this pack
|
* Search for object id with the specified start offset in this pack
|
||||||
* (reverse) index.
|
* (reverse) index.
|
||||||
|
@ -135,12 +32,7 @@ private PackReverseIndex(PackIndex packIndex) {
|
||||||
* start offset of object to find.
|
* start offset of object to find.
|
||||||
* @return object id for this offset, or null if no object was found.
|
* @return object id for this offset, or null if no object was found.
|
||||||
*/
|
*/
|
||||||
public ObjectId findObject(long offset) {
|
ObjectId findObject(long offset);
|
||||||
final int ith = binarySearch(offset);
|
|
||||||
if (ith < 0)
|
|
||||||
return null;
|
|
||||||
return index.getObjectId(nth[ith]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Search for the next offset to the specified offset in this pack (reverse)
|
* Search for the next offset to the specified offset in this pack (reverse)
|
||||||
|
@ -157,42 +49,25 @@ public ObjectId findObject(long offset) {
|
||||||
* @throws org.eclipse.jgit.errors.CorruptObjectException
|
* @throws org.eclipse.jgit.errors.CorruptObjectException
|
||||||
* when there is no object with the provided offset.
|
* when there is no object with the provided offset.
|
||||||
*/
|
*/
|
||||||
public long findNextOffset(long offset, long maxOffset)
|
long findNextOffset(long offset, long maxOffset)
|
||||||
throws CorruptObjectException {
|
throws CorruptObjectException;
|
||||||
final int ith = binarySearch(offset);
|
|
||||||
if (ith < 0)
|
|
||||||
throw new CorruptObjectException(
|
|
||||||
MessageFormat.format(
|
|
||||||
JGitText.get().cantFindObjectInReversePackIndexForTheSpecifiedOffset,
|
|
||||||
Long.valueOf(offset)));
|
|
||||||
|
|
||||||
if (ith + 1 == nth.length)
|
/**
|
||||||
return maxOffset;
|
* Find the position in the primary index of the object at the given pack
|
||||||
return index.getOffset(nth[ith + 1]);
|
* offset.
|
||||||
}
|
*
|
||||||
|
* @param offset
|
||||||
|
* the pack offset of the object
|
||||||
|
* @return the position in the primary index of the object
|
||||||
|
*/
|
||||||
|
int findPosition(long offset);
|
||||||
|
|
||||||
int findPosition(long offset) {
|
/**
|
||||||
return binarySearch(offset);
|
* Find the object that is in the given position in the primary index.
|
||||||
}
|
*
|
||||||
|
* @param nthPosition
|
||||||
private int binarySearch(long offset) {
|
* the position of the object in the primary index
|
||||||
int bucket = (int) (offset / bucketSize);
|
* @return the object in that position
|
||||||
int low = bucket == 0 ? 0 : offsetIndex[bucket - 1];
|
*/
|
||||||
int high = offsetIndex[bucket];
|
ObjectId findObjectByPosition(int nthPosition);
|
||||||
while (low < high) {
|
|
||||||
final int mid = (low + high) >>> 1;
|
|
||||||
final long o = index.getOffset(nth[mid]);
|
|
||||||
if (offset < o)
|
|
||||||
high = mid;
|
|
||||||
else if (offset == o)
|
|
||||||
return mid;
|
|
||||||
else
|
|
||||||
low = mid + 1;
|
|
||||||
}
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
ObjectId findObjectByPosition(int nthPosition) {
|
|
||||||
return index.getObjectId(nth[nthPosition]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,174 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2023, Google LLC and others
|
||||||
|
*
|
||||||
|
* This program and the accompanying materials are made available under the
|
||||||
|
* terms of the Eclipse Distribution License v. 1.0 which is available at
|
||||||
|
* https://www.eclipse.org/org/documents/edl-v10.php.
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: BSD-3-Clause
|
||||||
|
*/
|
||||||
|
package org.eclipse.jgit.internal.storage.file;
|
||||||
|
|
||||||
|
import java.text.MessageFormat;
|
||||||
|
|
||||||
|
import org.eclipse.jgit.errors.CorruptObjectException;
|
||||||
|
import org.eclipse.jgit.internal.JGitText;
|
||||||
|
import org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry;
|
||||||
|
import org.eclipse.jgit.lib.ObjectId;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reverse index for forward pack index which is computed from the forward pack
|
||||||
|
* index.
|
||||||
|
* <p>
|
||||||
|
* Creating an instance uses an insertion sort of the entries in the forward
|
||||||
|
* index, so it runs in quadratic time on average.
|
||||||
|
*/
|
||||||
|
final class PackReverseIndexComputed implements PackReverseIndex {
|
||||||
|
/**
|
||||||
|
* Index we were created from, and that has our ObjectId data.
|
||||||
|
*/
|
||||||
|
private final PackIndex index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The number of bytes per entry in the offsetIndex.
|
||||||
|
*/
|
||||||
|
private final long bucketSize;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An index into the nth mapping, where the value is the position after the
|
||||||
|
* the last index that contains the values of the bucket. For example given
|
||||||
|
* offset o (and bucket = o / bucketSize), the offset will be contained in
|
||||||
|
* the range nth[offsetIndex[bucket - 1]] inclusive to
|
||||||
|
* nth[offsetIndex[bucket]] exclusive.
|
||||||
|
* <p>
|
||||||
|
* See {@link #binarySearch}
|
||||||
|
*/
|
||||||
|
private final int[] offsetIndex;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mapping from indices in offset order to indices in SHA-1 order.
|
||||||
|
*/
|
||||||
|
private final int[] nth;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create reverse index from straight/forward pack index, by indexing all
|
||||||
|
* its entries.
|
||||||
|
*
|
||||||
|
* @param packIndex
|
||||||
|
* forward index - entries to (reverse) index.
|
||||||
|
*/
|
||||||
|
PackReverseIndexComputed(PackIndex packIndex) {
|
||||||
|
index = packIndex;
|
||||||
|
|
||||||
|
final long cnt = index.getObjectCount();
|
||||||
|
if (cnt + 1 > Integer.MAX_VALUE) {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
JGitText.get().hugeIndexesAreNotSupportedByJgitYet);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cnt == 0) {
|
||||||
|
bucketSize = Long.MAX_VALUE;
|
||||||
|
offsetIndex = new int[1];
|
||||||
|
nth = new int[0];
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
final long[] offsetsBySha1 = new long[(int) cnt];
|
||||||
|
|
||||||
|
long maxOffset = 0;
|
||||||
|
int ith = 0;
|
||||||
|
for (MutableEntry me : index) {
|
||||||
|
final long o = me.getOffset();
|
||||||
|
offsetsBySha1[ith++] = o;
|
||||||
|
if (o > maxOffset) {
|
||||||
|
maxOffset = o;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bucketSize = maxOffset / cnt + 1;
|
||||||
|
int[] bucketIndex = new int[(int) cnt];
|
||||||
|
int[] bucketValues = new int[(int) cnt + 1];
|
||||||
|
for (int oi = 0; oi < offsetsBySha1.length; oi++) {
|
||||||
|
final long o = offsetsBySha1[oi];
|
||||||
|
final int bucket = (int) (o / bucketSize);
|
||||||
|
final int bucketValuesPos = oi + 1;
|
||||||
|
final int current = bucketIndex[bucket];
|
||||||
|
bucketIndex[bucket] = bucketValuesPos;
|
||||||
|
bucketValues[bucketValuesPos] = current;
|
||||||
|
}
|
||||||
|
|
||||||
|
int nthByOffset = 0;
|
||||||
|
nth = new int[offsetsBySha1.length];
|
||||||
|
offsetIndex = bucketIndex; // Reuse the allocation
|
||||||
|
for (int bi = 0; bi < bucketIndex.length; bi++) {
|
||||||
|
final int start = nthByOffset;
|
||||||
|
// Insertion sort of the values in the bucket.
|
||||||
|
for (int vi = bucketIndex[bi]; vi > 0; vi = bucketValues[vi]) {
|
||||||
|
final int nthBySha1 = vi - 1;
|
||||||
|
final long o = offsetsBySha1[nthBySha1];
|
||||||
|
int insertion = nthByOffset++;
|
||||||
|
for (; start < insertion; insertion--) {
|
||||||
|
if (o > offsetsBySha1[nth[insertion - 1]]) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
nth[insertion] = nth[insertion - 1];
|
||||||
|
}
|
||||||
|
nth[insertion] = nthBySha1;
|
||||||
|
}
|
||||||
|
offsetIndex[bi] = nthByOffset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ObjectId findObject(long offset) {
|
||||||
|
final int ith = binarySearch(offset);
|
||||||
|
if (ith < 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return index.getObjectId(nth[ith]);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long findNextOffset(long offset, long maxOffset)
|
||||||
|
throws CorruptObjectException {
|
||||||
|
final int ith = binarySearch(offset);
|
||||||
|
if (ith < 0) {
|
||||||
|
throw new CorruptObjectException(MessageFormat.format(JGitText
|
||||||
|
.get().cantFindObjectInReversePackIndexForTheSpecifiedOffset,
|
||||||
|
Long.valueOf(offset)));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ith + 1 == nth.length) {
|
||||||
|
return maxOffset;
|
||||||
|
}
|
||||||
|
return index.getOffset(nth[ith + 1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int findPosition(long offset) {
|
||||||
|
return binarySearch(offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
private int binarySearch(long offset) {
|
||||||
|
int bucket = (int) (offset / bucketSize);
|
||||||
|
int low = bucket == 0 ? 0 : offsetIndex[bucket - 1];
|
||||||
|
int high = offsetIndex[bucket];
|
||||||
|
while (low < high) {
|
||||||
|
final int mid = (low + high) >>> 1;
|
||||||
|
final long o = index.getOffset(nth[mid]);
|
||||||
|
if (offset < o) {
|
||||||
|
high = mid;
|
||||||
|
} else if (offset == o) {
|
||||||
|
return mid;
|
||||||
|
} else {
|
||||||
|
low = mid + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ObjectId findObjectByPosition(int nthPosition) {
|
||||||
|
return index.getObjectId(nth[nthPosition]);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,29 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2023, Google LLC and others
|
||||||
|
*
|
||||||
|
* This program and the accompanying materials are made available under the
|
||||||
|
* terms of the Eclipse Distribution License v. 1.0 which is available at
|
||||||
|
* https://www.eclipse.org/org/documents/edl-v10.php.
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: BSD-3-Clause
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.eclipse.jgit.internal.storage.file;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory for creating instances of {@link PackReverseIndex}.
|
||||||
|
*/
|
||||||
|
public final class PackReverseIndexFactory {
|
||||||
|
/**
|
||||||
|
* Compute an in-memory pack reverse index from the in-memory pack forward
|
||||||
|
* index. This computation uses insertion sort, which has a quadratic
|
||||||
|
* runtime on average.
|
||||||
|
*
|
||||||
|
* @param packIndex
|
||||||
|
* the forward index to compute from
|
||||||
|
* @return the reverse index instance
|
||||||
|
*/
|
||||||
|
public static PackReverseIndex computeFromIndex(PackIndex packIndex) {
|
||||||
|
return new PackReverseIndexComputed(packIndex);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue