Merge changes I60a92463,Ic3b68220

* changes:
  PackReverseIndexV1: reverse index parsed from version 1 file
  ComputedPackReverseIndex: Clarify custom bucket sort algorithm
This commit is contained in:
Jonathan Tan 2023-07-21 14:05:38 -04:00 committed by Gerrit Code Review @ Eclipse.org
commit 0f4af2bc36
11 changed files with 747 additions and 59 deletions

View File

@ -0,0 +1,56 @@
/*
* Copyright (C) 2022, Google LLC and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.internal.storage.file;
import static org.junit.Assert.assertThrows;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import org.junit.Test;
public class PackReverseIndexTest {
@Test
public void read_badMagic() {
byte[] badMagic = new byte[] { 'R', 'B', 'A', 'D', // magic
0x00, 0x00, 0x00, 0x01, // file version
0x00, 0x00, 0x00, 0x01, // oid version
// pack checksum
'P', 'A', 'C', 'K', 'C', 'H', 'E', 'C', 'K', 'S', 'U', 'M', '3',
'4', '5', '6', '7', '8', '9', '0',
// checksum
0x66, 0x01, (byte) 0xbc, (byte) 0xe8, 0x51, 0x4b, 0x2f,
(byte) 0xa1, (byte) 0xa9, (byte) 0xcd, (byte) 0xbe, (byte) 0xd6,
0x4f, (byte) 0xa8, 0x7d, (byte) 0xab, 0x50, (byte) 0xa3,
(byte) 0xf7, (byte) 0xcc, };
ByteArrayInputStream in = new ByteArrayInputStream(badMagic);
assertThrows(IOException.class,
() -> PackReverseIndexFactory.readFromFile(in, 0, () -> null));
}
@Test
public void read_unsupportedVersion2() {
byte[] version2 = new byte[] { 'R', 'I', 'D', 'X', // magic
0x00, 0x00, 0x00, 0x02, // file version
0x00, 0x00, 0x00, 0x01, // oid version
// pack checksum
'P', 'A', 'C', 'K', 'C', 'H', 'E', 'C', 'K', 'S', 'U', 'M', '3',
'4', '5', '6', '7', '8', '9', '0',
// checksum
0x70, 0x17, 0x10, 0x51, (byte) 0xfe, (byte) 0xab, (byte) 0x9b,
0x68, (byte) 0xed, 0x3a, 0x3f, 0x27, 0x1d, (byte) 0xce,
(byte) 0xff, 0x38, 0x09, (byte) 0x9b, 0x29, 0x58, };
ByteArrayInputStream in = new ByteArrayInputStream(version2);
assertThrows(IOException.class,
() -> PackReverseIndexFactory.readFromFile(in, 0, () -> null));
}
}

View File

@ -0,0 +1,246 @@
/*
* Copyright (C) 2022, Google LLC and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.internal.storage.file;
import static org.eclipse.jgit.lib.Constants.OBJ_BLOB;
import static org.eclipse.jgit.lib.Constants.OBJ_COMMIT;
import static org.eclipse.jgit.lib.Constants.OBJ_TREE;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertThrows;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import org.eclipse.jgit.errors.CorruptObjectException;
import org.eclipse.jgit.junit.JGitTestUtil;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.transport.PackedObjectInfo;
import org.junit.Before;
import org.junit.Test;
public class PackReverseIndexV1Test {
private static final byte[] FAKE_PACK_CHECKSUM = new byte[] { 'P', 'A', 'C',
'K', 'C', 'H', 'E', 'C', 'K', 'S', 'U', 'M', '3', '4', '5', '6',
'7', '8', '9', '0', };
private static final byte[] NO_OBJECTS = new byte[] { 'R', 'I', 'D', 'X', // magic
0x00, 0x00, 0x00, 0x01, // file version
0x00, 0x00, 0x00, 0x01, // oid version
// pack checksum to copy into at byte 12
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// checksum
(byte) 0xd1, 0x1d, 0x17, (byte) 0xd5, (byte) 0xa1, 0x5c,
(byte) 0x8f, 0x45, 0x7e, 0x06, (byte) 0x91, (byte) 0xf2, 0x7e, 0x20,
0x35, 0x2c, (byte) 0xdc, 0x4c, 0x46, (byte) 0xe4, };
private static final byte[] SMALL_PACK_CHECKSUM = new byte[] { (byte) 0xbb,
0x1d, 0x25, 0x3d, (byte) 0xd3, (byte) 0xf0, 0x08, 0x75, (byte) 0xc8,
0x04, (byte) 0xd0, 0x6f, 0x73, (byte) 0xe9, 0x00, (byte) 0x82,
(byte) 0xdb, 0x09, (byte) 0xc8, 0x13, };
private static final byte[] SMALL_CONTENTS = new byte[] { 'R', 'I', 'D',
'X', // magic
0x00, 0x00, 0x00, 0x01, // file version
0x00, 0x00, 0x00, 0x01, // oid version
0x00, 0x00, 0x00, 0x04, // offset 12: "68" -> index @ 4
0x00, 0x00, 0x00, 0x02, // offset 165: "5c" -> index @ 2
0x00, 0x00, 0x00, 0x03, // offset 257: "62" -> index @ 3
0x00, 0x00, 0x00, 0x01, // offset 450: "58" -> index @ 1
0x00, 0x00, 0x00, 0x05, // offset 556: "c5" -> index @ 5
0x00, 0x00, 0x00, 0x00, // offset 614: "2d" -> index @ 0
// pack checksum to copy into at byte 36
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// checksum
(byte) 0xf0, 0x6d, 0x03, (byte) 0xd7, 0x6f, (byte) 0x9f,
(byte) 0xc1, 0x36, 0x26, (byte) 0xbc, (byte) 0xcb, 0x75, 0x36,
(byte) 0xa1, 0x26, 0x6a, 0x2b, (byte) 0x84, 0x16, (byte) 0x83, };
private PackReverseIndex emptyReverseIndex;
/**
* Reverse index for the pack-cbdeda40019ae0e6e789088ea0f51f164f489d14.idx
* with contents `SHA-1 type size size-in-packfile offset-in-packfile` as
* shown by `verify-pack`:
* 2d04ee74dba30078c2dcdb713ddb8be4bc084d76 blob 8 17 614
* 58728c938a9a8b9970cc09236caf94ada4689923 blob 140 106 450
* 5ce00008cf3fb8f194f52742020bd40d78f3f1b3 commit 81 92 165 1 68cb1f232964f3cd698afc1dafe583937203c587
* 62299a7ae290d685196e948a2fcb7d8c07f95c7d tree 198 193 257
* 68cb1f232964f3cd698afc1dafe583937203c587 commit 220 153 12
* c5ab27309491cf641eb11bb4b7a78641f280b482 tree 46 58 556 1 62299a7ae290d685196e948a2fcb7d8c07f95c7d
*/
private PackReverseIndex smallReverseIndex;
private final PackedObjectInfo object614 = objectInfo(
"2d04ee74dba30078c2dcdb713ddb8be4bc084d76", OBJ_BLOB, 614);
private final PackedObjectInfo object450 = objectInfo(
"58728c938a9a8b9970cc09236caf94ada4689923", OBJ_BLOB, 450);
private final PackedObjectInfo object165 = objectInfo(
"5ce00008cf3fb8f194f52742020bd40d78f3f1b3", OBJ_COMMIT, 165);
private final PackedObjectInfo object257 = objectInfo(
"62299a7ae290d685196e948a2fcb7d8c07f95c7d", OBJ_TREE, 257);
private final PackedObjectInfo object12 = objectInfo(
"68cb1f232964f3cd698afc1dafe583937203c587", OBJ_COMMIT, 12);
private final PackedObjectInfo object556 = objectInfo(
"c5ab27309491cf641eb11bb4b7a78641f280b482", OBJ_TREE, 556);
// last object's offset + last object's length
private final long smallMaxOffset = 631;
@Before
public void setUp() throws Exception {
System.arraycopy(SMALL_PACK_CHECKSUM, 0, SMALL_CONTENTS, 36,
SMALL_PACK_CHECKSUM.length);
ByteArrayInputStream smallIn = new ByteArrayInputStream(SMALL_CONTENTS);
smallReverseIndex = PackReverseIndexFactory.readFromFile(smallIn, 6,
() -> PackIndex.open(JGitTestUtil.getTestResourceFile(
"pack-cbdeda40019ae0e6e789088ea0f51f164f489d14.idx")));
System.arraycopy(FAKE_PACK_CHECKSUM, 0, NO_OBJECTS, 12,
FAKE_PACK_CHECKSUM.length);
ByteArrayInputStream emptyIn = new ByteArrayInputStream(NO_OBJECTS);
emptyReverseIndex = PackReverseIndexFactory.readFromFile(emptyIn, 0,
() -> null);
}
@Test
public void read_unsupportedOidSHA256() {
byte[] version2 = new byte[] { 'R', 'I', 'D', 'X', // magic
0x00, 0x00, 0x00, 0x01, // file version
0x00, 0x00, 0x00, 0x02, // oid version
// pack checksum to copy into at byte 12
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// checksum
0x6e, 0x78, 0x75, 0x67, (byte) 0x84, (byte) 0x89, (byte) 0xde,
(byte) 0xe3, (byte) 0x86, 0x6a, 0x3b, (byte) 0x98, 0x51,
(byte) 0xd8, (byte) 0x8c, (byte) 0xec, 0x50, (byte) 0xe7,
(byte) 0xfb, 0x22, };
System.arraycopy(FAKE_PACK_CHECKSUM, 0, version2, 12,
FAKE_PACK_CHECKSUM.length);
ByteArrayInputStream in = new ByteArrayInputStream(version2);
assertThrows(IOException.class,
() -> PackReverseIndexFactory.readFromFile(in, 0, () -> null));
}
@Test
public void read_objectCountTooLarge() {
ByteArrayInputStream dummyInput = new ByteArrayInputStream(NO_OBJECTS);
long biggerThanInt = ((long) Integer.MAX_VALUE) + 1;
assertThrows(IllegalArgumentException.class,
() -> PackReverseIndexFactory.readFromFile(dummyInput,
biggerThanInt,
() -> null));
}
@Test
public void findObject_noObjects() {
assertNull(emptyReverseIndex.findObject(0));
}
@Test
public void findObject_multipleObjects() {
assertEquals(object614, smallReverseIndex.findObject(614));
assertEquals(object450, smallReverseIndex.findObject(450));
assertEquals(object165, smallReverseIndex.findObject(165));
assertEquals(object257, smallReverseIndex.findObject(257));
assertEquals(object12, smallReverseIndex.findObject(12));
assertEquals(object556, smallReverseIndex.findObject(556));
}
@Test
public void findObject_badOffset() {
assertNull(smallReverseIndex.findObject(0));
}
@Test
public void findNextOffset_noObjects() {
assertThrows(IOException.class,
() -> emptyReverseIndex.findNextOffset(0, Long.MAX_VALUE));
}
@Test
public void findNextOffset_multipleObjects() throws CorruptObjectException {
assertEquals(smallMaxOffset,
smallReverseIndex.findNextOffset(614, smallMaxOffset));
assertEquals(614,
smallReverseIndex.findNextOffset(556, smallMaxOffset));
assertEquals(556,
smallReverseIndex.findNextOffset(450, smallMaxOffset));
assertEquals(450,
smallReverseIndex.findNextOffset(257, smallMaxOffset));
assertEquals(257,
smallReverseIndex.findNextOffset(165, smallMaxOffset));
assertEquals(165, smallReverseIndex.findNextOffset(12, smallMaxOffset));
}
@Test
public void findNextOffset_badOffset() {
assertThrows(IOException.class,
() -> smallReverseIndex.findNextOffset(0, Long.MAX_VALUE));
}
@Test
public void findPosition_noObjects() {
assertEquals(-1, emptyReverseIndex.findPosition(0));
}
@Test
public void findPosition_multipleObjects() {
assertEquals(0, smallReverseIndex.findPosition(12));
assertEquals(1, smallReverseIndex.findPosition(165));
assertEquals(2, smallReverseIndex.findPosition(257));
assertEquals(3, smallReverseIndex.findPosition(450));
assertEquals(4, smallReverseIndex.findPosition(556));
assertEquals(5, smallReverseIndex.findPosition(614));
}
@Test
public void findPosition_badOffset() {
assertEquals(-1, smallReverseIndex.findPosition(10));
}
@Test
public void findObjectByPosition_noObjects() {
assertThrows(AssertionError.class,
() -> emptyReverseIndex.findObjectByPosition(0));
}
@Test
public void findObjectByPosition_multipleObjects() {
assertEquals(object12, smallReverseIndex.findObjectByPosition(0));
assertEquals(object165, smallReverseIndex.findObjectByPosition(1));
assertEquals(object257, smallReverseIndex.findObjectByPosition(2));
assertEquals(object450, smallReverseIndex.findObjectByPosition(3));
assertEquals(object556, smallReverseIndex.findObjectByPosition(4));
assertEquals(object614, smallReverseIndex.findObjectByPosition(5));
}
@Test
public void findObjectByPosition_badOffset() {
assertThrows(AssertionError.class,
() -> smallReverseIndex.findObjectByPosition(10));
}
private static PackedObjectInfo objectInfo(String objectId, int type,
long offset) {
PackedObjectInfo objectInfo = new PackedObjectInfo(
ObjectId.fromString(objectId));
objectInfo.setType(type);
objectInfo.setOffset(offset);
return objectInfo;
}
}

View File

@ -0,0 +1,119 @@
/*
* Copyright (C) 2022, Google LLC and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.internal.storage.file;
import static org.eclipse.jgit.lib.Constants.OBJECT_ID_STRING_LENGTH;
import static org.eclipse.jgit.lib.Constants.OBJ_BLOB;
import static org.eclipse.jgit.lib.Constants.OBJ_COMMIT;
import static org.eclipse.jgit.lib.Constants.OBJ_TREE;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThrows;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.transport.PackedObjectInfo;
import org.junit.Test;
public class PackReverseIndexV1WriteReadTest {
private static byte[] PACK_CHECKSUM = new byte[] { 'P', 'A', 'C', 'K', 'C',
'H', 'E', 'C', 'K', 'S', 'U', 'M', '3', '4', '5', '6', '7', '8',
'9', '0', };
@Test
public void writeThenRead_noObjects() throws IOException {
ByteArrayOutputStream out = new ByteArrayOutputStream();
PackReverseIndexWriter writer = PackReverseIndexWriter.createWriter(out,
1);
List<PackedObjectInfo> objectsSortedByName = new ArrayList<>();
// write
writer.write(objectsSortedByName, PACK_CHECKSUM);
ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
// read
PackReverseIndex noObjectsReverseIndex = PackReverseIndexFactory
.readFromFile(in, 0, () -> null);
// use
assertThrows(AssertionError.class,
() -> noObjectsReverseIndex.findObjectByPosition(0));
}
@Test
public void writeThenRead_oneObject() throws IOException {
ByteArrayOutputStream out = new ByteArrayOutputStream();
PackReverseIndexWriter writer = PackReverseIndexWriter.createWriter(out,
1);
PackedObjectInfo a = objectInfo("a", OBJ_COMMIT, 0);
List<PackedObjectInfo> objectsSortedByName = List.of(a);
// write
writer.write(objectsSortedByName, PACK_CHECKSUM);
ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
PackIndex mockForwardIndex = mock(PackIndex.class);
when(mockForwardIndex.getObjectId(0)).thenReturn(a);
// read
PackReverseIndex oneObjectReverseIndex = PackReverseIndexFactory
.readFromFile(in, 1, () -> mockForwardIndex);
// use
assertEquals(a, oneObjectReverseIndex.findObjectByPosition(0));
}
@Test
public void writeThenRead_multipleObjectsLargeOffsets() throws IOException {
ByteArrayOutputStream out = new ByteArrayOutputStream();
PackReverseIndexWriter writer = PackReverseIndexWriter.createWriter(out,
1);
PackedObjectInfo a = objectInfo("a", OBJ_BLOB, 200000000);
PackedObjectInfo b = objectInfo("b", OBJ_COMMIT, 0);
PackedObjectInfo c = objectInfo("c", OBJ_COMMIT, 52000000000L);
PackedObjectInfo d = objectInfo("d", OBJ_TREE, 7);
PackedObjectInfo e = objectInfo("e", OBJ_COMMIT, 38000000000L);
List<PackedObjectInfo> objectsSortedByName = List.of(a, b, c, d, e);
writer.write(objectsSortedByName, PACK_CHECKSUM);
// write
writer.write(objectsSortedByName, PACK_CHECKSUM);
ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
PackIndex mockForwardIndex = mock(PackIndex.class);
when(mockForwardIndex.getObjectId(4)).thenReturn(e);
// read
PackReverseIndex multipleObjectsReverseIndex = PackReverseIndexFactory
.readFromFile(in, 5, () -> mockForwardIndex);
// use with minimal mocked forward index use
assertEquals(e, multipleObjectsReverseIndex.findObjectByPosition(3));
}
private static PackedObjectInfo objectInfo(String objectId, int type,
long offset) {
assert (objectId.length() == 1);
PackedObjectInfo objectInfo = new PackedObjectInfo(
ObjectId.fromString(objectId.repeat(OBJECT_ID_STRING_LENGTH)));
objectInfo.setType(type);
objectInfo.setOffset(offset);
return objectInfo;
}
}

View File

@ -689,6 +689,7 @@ searchForReachableBranches=Finding reachable branches
saveFileStoreAttributesFailed=Saving measured FileStore attributes to user config failed
searchForReuse=Finding sources
searchForReuseTimeout=Search for reuse timed out after {0} seconds
unsupportedObjectIdVersion=Object id version {0} is not supported
searchForSizes=Getting sizes
secondsAgo={0} seconds ago
selectingCommits=Selecting commits

View File

@ -866,6 +866,7 @@ public static JGitText get() {
/***/ public String unsupportedEncryptionVersion;
/***/ public String unsupportedGC;
/***/ public String unsupportedMark;
/***/ public String unsupportedObjectIdVersion;
/***/ public String unsupportedOperationNotAddAtEnd;
/***/ public String unsupportedPackIndexVersion;
/***/ public String unsupportedPackReverseIndexVersion;

View File

@ -24,6 +24,16 @@
* @see Pack
*/
public interface PackReverseIndex {
/**
* Magic bytes that uniquely identify git reverse index files.
*/
byte[] MAGIC = { 'R', 'I', 'D', 'X' };
/**
* The first reverse index file version.
*/
int VERSION_1 = 1;
/**
* Search for object id with the specified start offset in this pack
* (reverse) index.

View File

@ -30,25 +30,30 @@ final class PackReverseIndexComputed implements PackReverseIndex {
private final PackIndex index;
/**
* The number of bytes per entry in the offsetIndex.
* The difference in offset between the start of an offset bucket and the
* start of its succeeding bucket.
*/
private final long bucketSize;
/**
* An index into the nth mapping, where the value is the position after the
* the last index that contains the values of the bucket. For example given
* offset o (and bucket = o / bucketSize), the offset will be contained in
* the range nth[offsetIndex[bucket - 1]] inclusive to
* nth[offsetIndex[bucket]] exclusive.
* The indexes into indexPosInOffsetOrder at which the next bucket starts.
* <p>
* For example, given offset o (and therefore bucket = o / bucketSize), the
* indexPos corresponding to o will be contained in the range
* indexPosInOffsetOrder[nextBucketStart[bucket - 1]] inclusive to
* indexPosInOffsetOrder[nextBucketStart[bucket]] exclusive.
* <p>
* This range information can speed up #binarySearch by identifying the
* relevant bucket and only searching within its range.
* <p>
* See {@link #binarySearch}
*/
private final int[] offsetIndex;
private final int[] nextBucketStart;
/**
* Mapping from indices in offset order to indices in SHA-1 order.
*/
private final int[] nth;
private final int[] indexPosInOffsetOrder;
/**
* Create reverse index from straight/forward pack index, by indexing all
@ -60,62 +65,81 @@ final class PackReverseIndexComputed implements PackReverseIndex {
PackReverseIndexComputed(PackIndex packIndex) {
index = packIndex;
final long cnt = index.getObjectCount();
if (cnt + 1 > Integer.MAX_VALUE) {
long rawCnt = index.getObjectCount();
if (rawCnt + 1 > Integer.MAX_VALUE) {
throw new IllegalArgumentException(
JGitText.get().hugeIndexesAreNotSupportedByJgitYet);
}
int cnt = (int) rawCnt;
if (cnt == 0) {
bucketSize = Long.MAX_VALUE;
offsetIndex = new int[1];
nth = new int[0];
nextBucketStart = new int[1];
indexPosInOffsetOrder = new int[0];
return;
}
final long[] offsetsBySha1 = new long[(int) cnt];
// Sort the index positions according to the corresponding pack offsets.
// Use bucket sort since the offsets are somewhat uniformly distributed
// over the range (0, pack size).
long[] offsetsInIndexOrder = new long[cnt];
long maxOffset = 0;
int ith = 0;
for (MutableEntry me : index) {
final long o = me.getOffset();
offsetsBySha1[ith++] = o;
if (o > maxOffset) {
maxOffset = o;
int i = 0;
for (MutableEntry entry : index) {
long offset = entry.getOffset();
offsetsInIndexOrder[i++] = offset;
if (offset > maxOffset) {
maxOffset = offset;
}
}
bucketSize = maxOffset / cnt + 1;
int[] bucketIndex = new int[(int) cnt];
int[] bucketValues = new int[(int) cnt + 1];
for (int oi = 0; oi < offsetsBySha1.length; oi++) {
final long o = offsetsBySha1[oi];
final int bucket = (int) (o / bucketSize);
final int bucketValuesPos = oi + 1;
final int current = bucketIndex[bucket];
bucketIndex[bucket] = bucketValuesPos;
bucketValues[bucketValuesPos] = current;
// The values in each bucket, stored as a linked list. Given a bucket,
// headValues[bucket] contains the first value,
// furtherValues[headValues[bucket]] contains the second,
// furtherValues[furtherValues[headValues[bucket]]] the third, and so
// on. The linked list stops when a value is 0. The values themselves
// are shifted index positions. There won't be any
// collisions because every index position is unique.
int[] headValues = new int[cnt];
int[] furtherValues = new int[cnt + 1];
for (int indexPos = 0; indexPos < cnt; indexPos++) {
// The offset determines which bucket this index position falls
// into, since the goal is sort into offset order.
long offset = offsetsInIndexOrder[indexPos];
int bucket = (int) (offset / bucketSize);
// Store the index positions as 1-indexed so that default
// initialized value 0 can be interpreted as the end of the bucket
// values.
int asBucketValue = indexPos + 1;
// If there is an existing value in this bucket, push the value to
// the front of the linked list.
int current = headValues[bucket];
headValues[bucket] = asBucketValue;
furtherValues[asBucketValue] = current;
}
int nthByOffset = 0;
nth = new int[offsetsBySha1.length];
offsetIndex = bucketIndex; // Reuse the allocation
for (int bi = 0; bi < bucketIndex.length; bi++) {
final int start = nthByOffset;
indexPosInOffsetOrder = new int[cnt];
nextBucketStart = headValues; // Reuse the allocation
for (int bi = 0; bi < headValues.length; bi++) {
// Insertion sort of the values in the bucket.
for (int vi = bucketIndex[bi]; vi > 0; vi = bucketValues[vi]) {
final int nthBySha1 = vi - 1;
final long o = offsetsBySha1[nthBySha1];
int start = nthByOffset;
for (int vi = headValues[bi]; vi > 0; vi = furtherValues[vi]) {
int nthBySha1 = vi - 1;
long o = offsetsInIndexOrder[nthBySha1];
int insertion = nthByOffset++;
for (; start < insertion; insertion--) {
if (o > offsetsBySha1[nth[insertion - 1]]) {
if (o > offsetsInIndexOrder[indexPosInOffsetOrder[insertion
- 1]]) {
break;
}
nth[insertion] = nth[insertion - 1];
indexPosInOffsetOrder[insertion] = indexPosInOffsetOrder[insertion
- 1];
}
nth[insertion] = nthBySha1;
indexPosInOffsetOrder[insertion] = nthBySha1;
}
offsetIndex[bi] = nthByOffset;
nextBucketStart[bi] = nthByOffset;
}
}
@ -125,7 +149,7 @@ public ObjectId findObject(long offset) {
if (ith < 0) {
return null;
}
return index.getObjectId(nth[ith]);
return index.getObjectId(indexPosInOffsetOrder[ith]);
}
@Override
@ -138,10 +162,10 @@ public long findNextOffset(long offset, long maxOffset)
Long.valueOf(offset)));
}
if (ith + 1 == nth.length) {
if (ith + 1 == indexPosInOffsetOrder.length) {
return maxOffset;
}
return index.getOffset(nth[ith + 1]);
return index.getOffset(indexPosInOffsetOrder[ith + 1]);
}
@Override
@ -151,11 +175,11 @@ public int findPosition(long offset) {
private int binarySearch(long offset) {
int bucket = (int) (offset / bucketSize);
int low = bucket == 0 ? 0 : offsetIndex[bucket - 1];
int high = offsetIndex[bucket];
int low = bucket == 0 ? 0 : nextBucketStart[bucket - 1];
int high = nextBucketStart[bucket];
while (low < high) {
final int mid = (low + high) >>> 1;
final long o = index.getOffset(nth[mid]);
final long o = index.getOffset(indexPosInOffsetOrder[mid]);
if (offset < o) {
high = mid;
} else if (offset == o) {
@ -169,6 +193,6 @@ private int binarySearch(long offset) {
@Override
public ObjectId findObjectByPosition(int nthPosition) {
return index.getObjectId(nth[nthPosition]);
return index.getObjectId(indexPosInOffsetOrder[nthPosition]);
}
}

View File

@ -10,6 +10,20 @@
package org.eclipse.jgit.internal.storage.file;
import static org.eclipse.jgit.internal.storage.file.PackReverseIndex.MAGIC;
import static org.eclipse.jgit.internal.storage.file.PackReverseIndex.VERSION_1;
import java.io.DataInput;
import java.io.IOException;
import java.io.InputStream;
import java.security.DigestInputStream;
import java.text.MessageFormat;
import java.util.Arrays;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.util.IO;
/**
* Factory for creating instances of {@link PackReverseIndex}.
*/
@ -26,4 +40,45 @@ public final class PackReverseIndexFactory {
public static PackReverseIndex computeFromIndex(PackIndex packIndex) {
return new PackReverseIndexComputed(packIndex);
}
/**
* Read an in-memory pack reverse index from the given input stream. This
* has a linear runtime.
*
* @param src
* the input stream to read the contents from
* @param objectCount
* the number of objects in the corresponding pack
* @param packIndexSupplier
* a function to lazily get the corresponding forward index
* @return the reverse index instance
* @throws IOException
* if reading from the input stream fails
*/
static PackReverseIndex readFromFile(InputStream src, long objectCount,
PackBitmapIndex.SupplierWithIOException<PackIndex> packIndexSupplier)
throws IOException {
final DigestInputStream digestIn = new DigestInputStream(src,
Constants.newMessageDigest());
final byte[] magic = new byte[MAGIC.length];
IO.readFully(digestIn, magic);
if (!Arrays.equals(magic, MAGIC)) {
throw new IOException(
MessageFormat.format(JGitText.get().expectedGot,
Arrays.toString(MAGIC), Arrays.toString(magic)));
}
DataInput dataIn = new SimpleDataInput(digestIn);
int version = dataIn.readInt();
switch (version) {
case VERSION_1:
return new PackReverseIndexV1(digestIn, objectCount,
packIndexSupplier);
default:
throw new IOException(MessageFormat.format(
JGitText.get().unsupportedPackReverseIndexVersion,
String.valueOf(version)));
}
}
}

View File

@ -0,0 +1,182 @@
/*
* Copyright (C) 2022, Google LLC and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.internal.storage.file;
import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH;
import java.io.DataInput;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.security.DigestInputStream;
import java.text.MessageFormat;
import org.eclipse.jgit.errors.CorruptObjectException;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.util.IO;
/**
* Reverse index for forward pack index which is parsed from a version 1 file.
* <p>
* The file format is specified at
* https://git-scm.com/docs/pack-format#_pack_rev_files_have_the_format.
*/
final class PackReverseIndexV1 implements PackReverseIndex {
static final int OID_VERSION_SHA1 = 1;
static final int OID_VERSION_SHA256 = 2;
private static final int SHA1_BYTES = OBJECT_ID_LENGTH;
private final DigestInputStream inputStream;
private final DataInput dataIn;
/**
* A lazy supplier for the corresponding PackIndex. The PackIndex is not
* needed during instantiation and parsing, only later when querying the
* reverse index. Allow lazy loading so that the parsing of the forward and
* reverse indices could happen in parallel.
*/
private final PackBitmapIndex.SupplierWithIOException<PackIndex> packIndexSupplier;
private int objectCount;
private byte[] packChecksum;
private int[] indexPositionsSortedByOffset;
private PackIndex packIndex;
PackReverseIndexV1(DigestInputStream inputStream, long objectCount,
PackBitmapIndex.SupplierWithIOException<PackIndex> packIndexSupplier)
throws IOException {
try {
this.objectCount = Math.toIntExact(objectCount);
} catch (ArithmeticException e) {
throw new IllegalArgumentException(
JGitText.get().hugeIndexesAreNotSupportedByJgitYet);
}
this.inputStream = inputStream;
dataIn = new SimpleDataInput(inputStream);
int oid_version = dataIn.readInt();
switch (oid_version) {
case OID_VERSION_SHA1:
// JGit Pack only supports AnyObjectId, which represents SHA1.
break;
case OID_VERSION_SHA256:
throw new IOException(MessageFormat.format(
JGitText.get().unsupportedObjectIdVersion, "SHA256")); //$NON-NLS-1$
default:
throw new IOException(MessageFormat.format(
JGitText.get().unsupportedObjectIdVersion,
String.valueOf(oid_version)));
}
indexPositionsSortedByOffset = new int[this.objectCount];
this.packIndexSupplier = packIndexSupplier;
parseBody();
parseChecksums();
}
private void parseBody() throws IOException {
for (int i = 0; i < objectCount; i++) {
indexPositionsSortedByOffset[i] = dataIn.readInt();
}
}
private void parseChecksums() throws IOException {
packChecksum = new byte[SHA1_BYTES];
IO.readFully(inputStream, packChecksum);
// TODO: verify checksum
byte[] readSelfChecksum = new byte[SHA1_BYTES];
IO.readFully(inputStream, readSelfChecksum);
}
@Override
public ObjectId findObject(long offset) {
int reversePosition = findPosition(offset);
if (reversePosition < 0) {
return null;
}
int forwardPosition = findForwardPositionByReversePosition(
reversePosition);
return getPackIndex().getObjectId(forwardPosition);
}
@Override
public long findNextOffset(long offset, long maxOffset)
throws CorruptObjectException {
int position = findPosition(offset);
if (position < 0) {
throw new CorruptObjectException(MessageFormat.format(JGitText
.get().cantFindObjectInReversePackIndexForTheSpecifiedOffset,
Long.valueOf(offset)));
}
if (position + 1 == objectCount) {
return maxOffset;
}
return findOffsetByReversePosition(position + 1);
}
@Override
public int findPosition(long offset) {
return binarySearchByOffset(offset);
}
@Override
public ObjectId findObjectByPosition(int position) {
return getPackIndex()
.getObjectId(findForwardPositionByReversePosition(position));
}
private long findOffsetByReversePosition(int position) {
return getPackIndex()
.getOffset(findForwardPositionByReversePosition(position));
}
private int findForwardPositionByReversePosition(int reversePosition) {
assert (reversePosition >= 0);
assert (reversePosition < indexPositionsSortedByOffset.length);
return indexPositionsSortedByOffset[reversePosition];
}
private int binarySearchByOffset(long wantedOffset) {
int low = 0;
int high = objectCount - 1;
while (low <= high) {
int mid = (low + high) >>> 1;
long offsetAtMid = findOffsetByReversePosition(mid);
if (offsetAtMid == wantedOffset) {
return mid;
} else if (offsetAtMid > wantedOffset) {
high = mid - 1;
} else {
low = mid + 1;
}
}
return -1;
}
private PackIndex getPackIndex() {
if (packIndex == null) {
try {
packIndex = packIndexSupplier.get();
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
return packIndex;
}
}

View File

@ -9,6 +9,8 @@
*/
package org.eclipse.jgit.internal.storage.file;
import static org.eclipse.jgit.internal.storage.file.PackReverseIndex.VERSION_1;
import java.io.BufferedOutputStream;
import java.io.DataOutput;
import java.io.IOException;
@ -28,16 +30,6 @@
* https://git-scm.com/docs/pack-format#_pack_rev_files_have_the_format.
*/
public abstract class PackReverseIndexWriter {
/**
* Magic bytes that uniquely identify git reverse index files.
*/
protected static byte[] MAGIC = { 'R', 'I', 'D', 'X' };
/**
* The first reverse index file version.
*/
protected static final int VERSION_1 = 1;
/**
* Stream to write contents to while maintaining a checksum.
*/

View File

@ -9,6 +9,10 @@
*/
package org.eclipse.jgit.internal.storage.file;
import static org.eclipse.jgit.internal.storage.file.PackReverseIndex.MAGIC;
import static org.eclipse.jgit.internal.storage.file.PackReverseIndex.VERSION_1;
import static org.eclipse.jgit.internal.storage.file.PackReverseIndexV1.OID_VERSION_SHA1;
import java.io.IOException;
import java.io.OutputStream;
import java.util.List;
@ -24,8 +28,6 @@
* https://git-scm.com/docs/pack-format#_pack_rev_files_have_the_format.
*/
final class PackReverseIndexWriterV1 extends PackReverseIndexWriter {
private static final int OID_VERSION_SHA1 = 1;
private static final int DEFAULT_OID_VERSION = OID_VERSION_SHA1;
PackReverseIndexWriterV1(final OutputStream dst) {