diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackReverseIndexTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackReverseIndexTest.java new file mode 100644 index 000000000..84fc58e05 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackReverseIndexTest.java @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2022, Google LLC and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.eclipse.jgit.internal.storage.file; + +import static org.junit.Assert.assertThrows; + +import java.io.ByteArrayInputStream; +import java.io.IOException; + +import org.junit.Test; + +public class PackReverseIndexTest { + @Test + public void read_badMagic() { + byte[] badMagic = new byte[] { 'R', 'B', 'A', 'D', // magic + 0x00, 0x00, 0x00, 0x01, // file version + 0x00, 0x00, 0x00, 0x01, // oid version + // pack checksum + 'P', 'A', 'C', 'K', 'C', 'H', 'E', 'C', 'K', 'S', 'U', 'M', '3', + '4', '5', '6', '7', '8', '9', '0', + // checksum + 0x66, 0x01, (byte) 0xbc, (byte) 0xe8, 0x51, 0x4b, 0x2f, + (byte) 0xa1, (byte) 0xa9, (byte) 0xcd, (byte) 0xbe, (byte) 0xd6, + 0x4f, (byte) 0xa8, 0x7d, (byte) 0xab, 0x50, (byte) 0xa3, + (byte) 0xf7, (byte) 0xcc, }; + ByteArrayInputStream in = new ByteArrayInputStream(badMagic); + + assertThrows(IOException.class, + () -> PackReverseIndexFactory.readFromFile(in, 0, () -> null)); + } + + @Test + public void read_unsupportedVersion2() { + byte[] version2 = new byte[] { 'R', 'I', 'D', 'X', // magic + 0x00, 0x00, 0x00, 0x02, // file version + 0x00, 0x00, 0x00, 0x01, // oid version + // pack checksum + 'P', 'A', 'C', 'K', 'C', 'H', 'E', 'C', 'K', 'S', 'U', 'M', '3', + '4', '5', '6', '7', '8', '9', '0', + // checksum + 0x70, 0x17, 0x10, 0x51, (byte) 0xfe, (byte) 0xab, (byte) 0x9b, + 0x68, (byte) 0xed, 0x3a, 0x3f, 0x27, 0x1d, (byte) 0xce, + (byte) 0xff, 0x38, 0x09, (byte) 0x9b, 0x29, 0x58, }; + ByteArrayInputStream in = new ByteArrayInputStream(version2); + + assertThrows(IOException.class, + () -> PackReverseIndexFactory.readFromFile(in, 0, () -> null)); + } +} diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackReverseIndexV1Test.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackReverseIndexV1Test.java new file mode 100644 index 000000000..b4849f99f --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackReverseIndexV1Test.java @@ -0,0 +1,246 @@ +/* + * Copyright (C) 2022, Google LLC and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.eclipse.jgit.internal.storage.file; + +import static org.eclipse.jgit.lib.Constants.OBJ_BLOB; +import static org.eclipse.jgit.lib.Constants.OBJ_COMMIT; +import static org.eclipse.jgit.lib.Constants.OBJ_TREE; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertThrows; + +import java.io.ByteArrayInputStream; +import java.io.IOException; + +import org.eclipse.jgit.errors.CorruptObjectException; +import org.eclipse.jgit.junit.JGitTestUtil; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.transport.PackedObjectInfo; +import org.junit.Before; +import org.junit.Test; + +public class PackReverseIndexV1Test { + private static final byte[] FAKE_PACK_CHECKSUM = new byte[] { 'P', 'A', 'C', + 'K', 'C', 'H', 'E', 'C', 'K', 'S', 'U', 'M', '3', '4', '5', '6', + '7', '8', '9', '0', }; + + private static final byte[] NO_OBJECTS = new byte[] { 'R', 'I', 'D', 'X', // magic + 0x00, 0x00, 0x00, 0x01, // file version + 0x00, 0x00, 0x00, 0x01, // oid version + // pack checksum to copy into at byte 12 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // checksum + (byte) 0xd1, 0x1d, 0x17, (byte) 0xd5, (byte) 0xa1, 0x5c, + (byte) 0x8f, 0x45, 0x7e, 0x06, (byte) 0x91, (byte) 0xf2, 0x7e, 0x20, + 0x35, 0x2c, (byte) 0xdc, 0x4c, 0x46, (byte) 0xe4, }; + + private static final byte[] SMALL_PACK_CHECKSUM = new byte[] { (byte) 0xbb, + 0x1d, 0x25, 0x3d, (byte) 0xd3, (byte) 0xf0, 0x08, 0x75, (byte) 0xc8, + 0x04, (byte) 0xd0, 0x6f, 0x73, (byte) 0xe9, 0x00, (byte) 0x82, + (byte) 0xdb, 0x09, (byte) 0xc8, 0x13, }; + + private static final byte[] SMALL_CONTENTS = new byte[] { 'R', 'I', 'D', + 'X', // magic + 0x00, 0x00, 0x00, 0x01, // file version + 0x00, 0x00, 0x00, 0x01, // oid version + 0x00, 0x00, 0x00, 0x04, // offset 12: "68" -> index @ 4 + 0x00, 0x00, 0x00, 0x02, // offset 165: "5c" -> index @ 2 + 0x00, 0x00, 0x00, 0x03, // offset 257: "62" -> index @ 3 + 0x00, 0x00, 0x00, 0x01, // offset 450: "58" -> index @ 1 + 0x00, 0x00, 0x00, 0x05, // offset 556: "c5" -> index @ 5 + 0x00, 0x00, 0x00, 0x00, // offset 614: "2d" -> index @ 0 + // pack checksum to copy into at byte 36 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // checksum + (byte) 0xf0, 0x6d, 0x03, (byte) 0xd7, 0x6f, (byte) 0x9f, + (byte) 0xc1, 0x36, 0x26, (byte) 0xbc, (byte) 0xcb, 0x75, 0x36, + (byte) 0xa1, 0x26, 0x6a, 0x2b, (byte) 0x84, 0x16, (byte) 0x83, }; + + private PackReverseIndex emptyReverseIndex; + + /** + * Reverse index for the pack-cbdeda40019ae0e6e789088ea0f51f164f489d14.idx + * with contents `SHA-1 type size size-in-packfile offset-in-packfile` as + * shown by `verify-pack`: + * 2d04ee74dba30078c2dcdb713ddb8be4bc084d76 blob 8 17 614 + * 58728c938a9a8b9970cc09236caf94ada4689923 blob 140 106 450 + * 5ce00008cf3fb8f194f52742020bd40d78f3f1b3 commit 81 92 165 1 68cb1f232964f3cd698afc1dafe583937203c587 + * 62299a7ae290d685196e948a2fcb7d8c07f95c7d tree 198 193 257 + * 68cb1f232964f3cd698afc1dafe583937203c587 commit 220 153 12 + * c5ab27309491cf641eb11bb4b7a78641f280b482 tree 46 58 556 1 62299a7ae290d685196e948a2fcb7d8c07f95c7d + */ + private PackReverseIndex smallReverseIndex; + + private final PackedObjectInfo object614 = objectInfo( + "2d04ee74dba30078c2dcdb713ddb8be4bc084d76", OBJ_BLOB, 614); + + private final PackedObjectInfo object450 = objectInfo( + "58728c938a9a8b9970cc09236caf94ada4689923", OBJ_BLOB, 450); + + private final PackedObjectInfo object165 = objectInfo( + "5ce00008cf3fb8f194f52742020bd40d78f3f1b3", OBJ_COMMIT, 165); + + private final PackedObjectInfo object257 = objectInfo( + "62299a7ae290d685196e948a2fcb7d8c07f95c7d", OBJ_TREE, 257); + + private final PackedObjectInfo object12 = objectInfo( + "68cb1f232964f3cd698afc1dafe583937203c587", OBJ_COMMIT, 12); + + private final PackedObjectInfo object556 = objectInfo( + "c5ab27309491cf641eb11bb4b7a78641f280b482", OBJ_TREE, 556); + + // last object's offset + last object's length + private final long smallMaxOffset = 631; + + @Before + public void setUp() throws Exception { + System.arraycopy(SMALL_PACK_CHECKSUM, 0, SMALL_CONTENTS, 36, + SMALL_PACK_CHECKSUM.length); + ByteArrayInputStream smallIn = new ByteArrayInputStream(SMALL_CONTENTS); + smallReverseIndex = PackReverseIndexFactory.readFromFile(smallIn, 6, + () -> PackIndex.open(JGitTestUtil.getTestResourceFile( + "pack-cbdeda40019ae0e6e789088ea0f51f164f489d14.idx"))); + + System.arraycopy(FAKE_PACK_CHECKSUM, 0, NO_OBJECTS, 12, + FAKE_PACK_CHECKSUM.length); + ByteArrayInputStream emptyIn = new ByteArrayInputStream(NO_OBJECTS); + emptyReverseIndex = PackReverseIndexFactory.readFromFile(emptyIn, 0, + () -> null); + } + + @Test + public void read_unsupportedOidSHA256() { + byte[] version2 = new byte[] { 'R', 'I', 'D', 'X', // magic + 0x00, 0x00, 0x00, 0x01, // file version + 0x00, 0x00, 0x00, 0x02, // oid version + // pack checksum to copy into at byte 12 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // checksum + 0x6e, 0x78, 0x75, 0x67, (byte) 0x84, (byte) 0x89, (byte) 0xde, + (byte) 0xe3, (byte) 0x86, 0x6a, 0x3b, (byte) 0x98, 0x51, + (byte) 0xd8, (byte) 0x8c, (byte) 0xec, 0x50, (byte) 0xe7, + (byte) 0xfb, 0x22, }; + System.arraycopy(FAKE_PACK_CHECKSUM, 0, version2, 12, + FAKE_PACK_CHECKSUM.length); + ByteArrayInputStream in = new ByteArrayInputStream(version2); + + assertThrows(IOException.class, + () -> PackReverseIndexFactory.readFromFile(in, 0, () -> null)); + } + + @Test + public void read_objectCountTooLarge() { + ByteArrayInputStream dummyInput = new ByteArrayInputStream(NO_OBJECTS); + long biggerThanInt = ((long) Integer.MAX_VALUE) + 1; + + assertThrows(IllegalArgumentException.class, + () -> PackReverseIndexFactory.readFromFile(dummyInput, + biggerThanInt, + () -> null)); + } + + @Test + public void findObject_noObjects() { + assertNull(emptyReverseIndex.findObject(0)); + } + + @Test + public void findObject_multipleObjects() { + assertEquals(object614, smallReverseIndex.findObject(614)); + assertEquals(object450, smallReverseIndex.findObject(450)); + assertEquals(object165, smallReverseIndex.findObject(165)); + assertEquals(object257, smallReverseIndex.findObject(257)); + assertEquals(object12, smallReverseIndex.findObject(12)); + assertEquals(object556, smallReverseIndex.findObject(556)); + } + + @Test + public void findObject_badOffset() { + assertNull(smallReverseIndex.findObject(0)); + } + + @Test + public void findNextOffset_noObjects() { + assertThrows(IOException.class, + () -> emptyReverseIndex.findNextOffset(0, Long.MAX_VALUE)); + } + + @Test + public void findNextOffset_multipleObjects() throws CorruptObjectException { + assertEquals(smallMaxOffset, + smallReverseIndex.findNextOffset(614, smallMaxOffset)); + assertEquals(614, + smallReverseIndex.findNextOffset(556, smallMaxOffset)); + assertEquals(556, + smallReverseIndex.findNextOffset(450, smallMaxOffset)); + assertEquals(450, + smallReverseIndex.findNextOffset(257, smallMaxOffset)); + assertEquals(257, + smallReverseIndex.findNextOffset(165, smallMaxOffset)); + assertEquals(165, smallReverseIndex.findNextOffset(12, smallMaxOffset)); + } + + @Test + public void findNextOffset_badOffset() { + assertThrows(IOException.class, + () -> smallReverseIndex.findNextOffset(0, Long.MAX_VALUE)); + } + + @Test + public void findPosition_noObjects() { + assertEquals(-1, emptyReverseIndex.findPosition(0)); + } + + @Test + public void findPosition_multipleObjects() { + assertEquals(0, smallReverseIndex.findPosition(12)); + assertEquals(1, smallReverseIndex.findPosition(165)); + assertEquals(2, smallReverseIndex.findPosition(257)); + assertEquals(3, smallReverseIndex.findPosition(450)); + assertEquals(4, smallReverseIndex.findPosition(556)); + assertEquals(5, smallReverseIndex.findPosition(614)); + } + + @Test + public void findPosition_badOffset() { + assertEquals(-1, smallReverseIndex.findPosition(10)); + } + + @Test + public void findObjectByPosition_noObjects() { + assertThrows(AssertionError.class, + () -> emptyReverseIndex.findObjectByPosition(0)); + } + + @Test + public void findObjectByPosition_multipleObjects() { + assertEquals(object12, smallReverseIndex.findObjectByPosition(0)); + assertEquals(object165, smallReverseIndex.findObjectByPosition(1)); + assertEquals(object257, smallReverseIndex.findObjectByPosition(2)); + assertEquals(object450, smallReverseIndex.findObjectByPosition(3)); + assertEquals(object556, smallReverseIndex.findObjectByPosition(4)); + assertEquals(object614, smallReverseIndex.findObjectByPosition(5)); + } + + @Test + public void findObjectByPosition_badOffset() { + assertThrows(AssertionError.class, + () -> smallReverseIndex.findObjectByPosition(10)); + } + + private static PackedObjectInfo objectInfo(String objectId, int type, + long offset) { + PackedObjectInfo objectInfo = new PackedObjectInfo( + ObjectId.fromString(objectId)); + objectInfo.setType(type); + objectInfo.setOffset(offset); + return objectInfo; + } +} diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackReverseIndexV1WriteReadTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackReverseIndexV1WriteReadTest.java new file mode 100644 index 000000000..372a4c7cb --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackReverseIndexV1WriteReadTest.java @@ -0,0 +1,119 @@ +/* + * Copyright (C) 2022, Google LLC and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.eclipse.jgit.internal.storage.file; + +import static org.eclipse.jgit.lib.Constants.OBJECT_ID_STRING_LENGTH; +import static org.eclipse.jgit.lib.Constants.OBJ_BLOB; +import static org.eclipse.jgit.lib.Constants.OBJ_COMMIT; +import static org.eclipse.jgit.lib.Constants.OBJ_TREE; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.transport.PackedObjectInfo; +import org.junit.Test; + +public class PackReverseIndexV1WriteReadTest { + + private static byte[] PACK_CHECKSUM = new byte[] { 'P', 'A', 'C', 'K', 'C', + 'H', 'E', 'C', 'K', 'S', 'U', 'M', '3', '4', '5', '6', '7', '8', + '9', '0', }; + + @Test + public void writeThenRead_noObjects() throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + PackReverseIndexWriter writer = PackReverseIndexWriter.createWriter(out, + 1); + List objectsSortedByName = new ArrayList<>(); + + // write + writer.write(objectsSortedByName, PACK_CHECKSUM); + + ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); + + // read + PackReverseIndex noObjectsReverseIndex = PackReverseIndexFactory + .readFromFile(in, 0, () -> null); + + // use + assertThrows(AssertionError.class, + () -> noObjectsReverseIndex.findObjectByPosition(0)); + } + + @Test + public void writeThenRead_oneObject() throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + PackReverseIndexWriter writer = PackReverseIndexWriter.createWriter(out, + 1); + PackedObjectInfo a = objectInfo("a", OBJ_COMMIT, 0); + List objectsSortedByName = List.of(a); + + // write + writer.write(objectsSortedByName, PACK_CHECKSUM); + + ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); + PackIndex mockForwardIndex = mock(PackIndex.class); + when(mockForwardIndex.getObjectId(0)).thenReturn(a); + + // read + PackReverseIndex oneObjectReverseIndex = PackReverseIndexFactory + .readFromFile(in, 1, () -> mockForwardIndex); + + // use + assertEquals(a, oneObjectReverseIndex.findObjectByPosition(0)); + } + + @Test + public void writeThenRead_multipleObjectsLargeOffsets() throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + PackReverseIndexWriter writer = PackReverseIndexWriter.createWriter(out, + 1); + PackedObjectInfo a = objectInfo("a", OBJ_BLOB, 200000000); + PackedObjectInfo b = objectInfo("b", OBJ_COMMIT, 0); + PackedObjectInfo c = objectInfo("c", OBJ_COMMIT, 52000000000L); + PackedObjectInfo d = objectInfo("d", OBJ_TREE, 7); + PackedObjectInfo e = objectInfo("e", OBJ_COMMIT, 38000000000L); + List objectsSortedByName = List.of(a, b, c, d, e); + + writer.write(objectsSortedByName, PACK_CHECKSUM); + + // write + writer.write(objectsSortedByName, PACK_CHECKSUM); + + ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); + PackIndex mockForwardIndex = mock(PackIndex.class); + when(mockForwardIndex.getObjectId(4)).thenReturn(e); + + // read + PackReverseIndex multipleObjectsReverseIndex = PackReverseIndexFactory + .readFromFile(in, 5, () -> mockForwardIndex); + + // use with minimal mocked forward index use + assertEquals(e, multipleObjectsReverseIndex.findObjectByPosition(3)); + } + + private static PackedObjectInfo objectInfo(String objectId, int type, + long offset) { + assert (objectId.length() == 1); + PackedObjectInfo objectInfo = new PackedObjectInfo( + ObjectId.fromString(objectId.repeat(OBJECT_ID_STRING_LENGTH))); + objectInfo.setType(type); + objectInfo.setOffset(offset); + return objectInfo; + } +} diff --git a/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties b/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties index bc8144f1c..bf8a1ef5d 100644 --- a/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties +++ b/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties @@ -689,6 +689,7 @@ searchForReachableBranches=Finding reachable branches saveFileStoreAttributesFailed=Saving measured FileStore attributes to user config failed searchForReuse=Finding sources searchForReuseTimeout=Search for reuse timed out after {0} seconds +unsupportedObjectIdVersion=Object id version {0} is not supported searchForSizes=Getting sizes secondsAgo={0} seconds ago selectingCommits=Selecting commits diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java index 518e0b7d9..a032d24c7 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java @@ -866,6 +866,7 @@ public static JGitText get() { /***/ public String unsupportedEncryptionVersion; /***/ public String unsupportedGC; /***/ public String unsupportedMark; + /***/ public String unsupportedObjectIdVersion; /***/ public String unsupportedOperationNotAddAtEnd; /***/ public String unsupportedPackIndexVersion; /***/ public String unsupportedPackReverseIndexVersion; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndex.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndex.java index c4290c371..74ea89025 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndex.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndex.java @@ -24,6 +24,16 @@ * @see Pack */ public interface PackReverseIndex { + /** + * Magic bytes that uniquely identify git reverse index files. + */ + byte[] MAGIC = { 'R', 'I', 'D', 'X' }; + + /** + * The first reverse index file version. + */ + int VERSION_1 = 1; + /** * Search for object id with the specified start offset in this pack * (reverse) index. diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexComputed.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexComputed.java index 33bdb0974..d6eaa965d 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexComputed.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexComputed.java @@ -30,25 +30,30 @@ final class PackReverseIndexComputed implements PackReverseIndex { private final PackIndex index; /** - * The number of bytes per entry in the offsetIndex. + * The difference in offset between the start of an offset bucket and the + * start of its succeeding bucket. */ private final long bucketSize; /** - * An index into the nth mapping, where the value is the position after the - * the last index that contains the values of the bucket. For example given - * offset o (and bucket = o / bucketSize), the offset will be contained in - * the range nth[offsetIndex[bucket - 1]] inclusive to - * nth[offsetIndex[bucket]] exclusive. + * The indexes into indexPosInOffsetOrder at which the next bucket starts. + *

+ * For example, given offset o (and therefore bucket = o / bucketSize), the + * indexPos corresponding to o will be contained in the range + * indexPosInOffsetOrder[nextBucketStart[bucket - 1]] inclusive to + * indexPosInOffsetOrder[nextBucketStart[bucket]] exclusive. + *

+ * This range information can speed up #binarySearch by identifying the + * relevant bucket and only searching within its range. *

* See {@link #binarySearch} */ - private final int[] offsetIndex; + private final int[] nextBucketStart; /** * Mapping from indices in offset order to indices in SHA-1 order. */ - private final int[] nth; + private final int[] indexPosInOffsetOrder; /** * Create reverse index from straight/forward pack index, by indexing all @@ -60,62 +65,81 @@ final class PackReverseIndexComputed implements PackReverseIndex { PackReverseIndexComputed(PackIndex packIndex) { index = packIndex; - final long cnt = index.getObjectCount(); - if (cnt + 1 > Integer.MAX_VALUE) { + long rawCnt = index.getObjectCount(); + if (rawCnt + 1 > Integer.MAX_VALUE) { throw new IllegalArgumentException( JGitText.get().hugeIndexesAreNotSupportedByJgitYet); } + int cnt = (int) rawCnt; if (cnt == 0) { bucketSize = Long.MAX_VALUE; - offsetIndex = new int[1]; - nth = new int[0]; + nextBucketStart = new int[1]; + indexPosInOffsetOrder = new int[0]; return; } - final long[] offsetsBySha1 = new long[(int) cnt]; - + // Sort the index positions according to the corresponding pack offsets. + // Use bucket sort since the offsets are somewhat uniformly distributed + // over the range (0, pack size). + long[] offsetsInIndexOrder = new long[cnt]; long maxOffset = 0; - int ith = 0; - for (MutableEntry me : index) { - final long o = me.getOffset(); - offsetsBySha1[ith++] = o; - if (o > maxOffset) { - maxOffset = o; + int i = 0; + for (MutableEntry entry : index) { + long offset = entry.getOffset(); + offsetsInIndexOrder[i++] = offset; + if (offset > maxOffset) { + maxOffset = offset; } } bucketSize = maxOffset / cnt + 1; - int[] bucketIndex = new int[(int) cnt]; - int[] bucketValues = new int[(int) cnt + 1]; - for (int oi = 0; oi < offsetsBySha1.length; oi++) { - final long o = offsetsBySha1[oi]; - final int bucket = (int) (o / bucketSize); - final int bucketValuesPos = oi + 1; - final int current = bucketIndex[bucket]; - bucketIndex[bucket] = bucketValuesPos; - bucketValues[bucketValuesPos] = current; + // The values in each bucket, stored as a linked list. Given a bucket, + // headValues[bucket] contains the first value, + // furtherValues[headValues[bucket]] contains the second, + // furtherValues[furtherValues[headValues[bucket]]] the third, and so + // on. The linked list stops when a value is 0. The values themselves + // are shifted index positions. There won't be any + // collisions because every index position is unique. + int[] headValues = new int[cnt]; + int[] furtherValues = new int[cnt + 1]; + for (int indexPos = 0; indexPos < cnt; indexPos++) { + // The offset determines which bucket this index position falls + // into, since the goal is sort into offset order. + long offset = offsetsInIndexOrder[indexPos]; + int bucket = (int) (offset / bucketSize); + // Store the index positions as 1-indexed so that default + // initialized value 0 can be interpreted as the end of the bucket + // values. + int asBucketValue = indexPos + 1; + // If there is an existing value in this bucket, push the value to + // the front of the linked list. + int current = headValues[bucket]; + headValues[bucket] = asBucketValue; + furtherValues[asBucketValue] = current; } int nthByOffset = 0; - nth = new int[offsetsBySha1.length]; - offsetIndex = bucketIndex; // Reuse the allocation - for (int bi = 0; bi < bucketIndex.length; bi++) { - final int start = nthByOffset; + indexPosInOffsetOrder = new int[cnt]; + nextBucketStart = headValues; // Reuse the allocation + for (int bi = 0; bi < headValues.length; bi++) { // Insertion sort of the values in the bucket. - for (int vi = bucketIndex[bi]; vi > 0; vi = bucketValues[vi]) { - final int nthBySha1 = vi - 1; - final long o = offsetsBySha1[nthBySha1]; + int start = nthByOffset; + for (int vi = headValues[bi]; vi > 0; vi = furtherValues[vi]) { + int nthBySha1 = vi - 1; + long o = offsetsInIndexOrder[nthBySha1]; int insertion = nthByOffset++; for (; start < insertion; insertion--) { - if (o > offsetsBySha1[nth[insertion - 1]]) { + if (o > offsetsInIndexOrder[indexPosInOffsetOrder[insertion + - 1]]) { break; } - nth[insertion] = nth[insertion - 1]; + indexPosInOffsetOrder[insertion] = indexPosInOffsetOrder[insertion + - 1]; } - nth[insertion] = nthBySha1; + indexPosInOffsetOrder[insertion] = nthBySha1; } - offsetIndex[bi] = nthByOffset; + nextBucketStart[bi] = nthByOffset; } } @@ -125,7 +149,7 @@ public ObjectId findObject(long offset) { if (ith < 0) { return null; } - return index.getObjectId(nth[ith]); + return index.getObjectId(indexPosInOffsetOrder[ith]); } @Override @@ -138,10 +162,10 @@ public long findNextOffset(long offset, long maxOffset) Long.valueOf(offset))); } - if (ith + 1 == nth.length) { + if (ith + 1 == indexPosInOffsetOrder.length) { return maxOffset; } - return index.getOffset(nth[ith + 1]); + return index.getOffset(indexPosInOffsetOrder[ith + 1]); } @Override @@ -151,11 +175,11 @@ public int findPosition(long offset) { private int binarySearch(long offset) { int bucket = (int) (offset / bucketSize); - int low = bucket == 0 ? 0 : offsetIndex[bucket - 1]; - int high = offsetIndex[bucket]; + int low = bucket == 0 ? 0 : nextBucketStart[bucket - 1]; + int high = nextBucketStart[bucket]; while (low < high) { final int mid = (low + high) >>> 1; - final long o = index.getOffset(nth[mid]); + final long o = index.getOffset(indexPosInOffsetOrder[mid]); if (offset < o) { high = mid; } else if (offset == o) { @@ -169,6 +193,6 @@ private int binarySearch(long offset) { @Override public ObjectId findObjectByPosition(int nthPosition) { - return index.getObjectId(nth[nthPosition]); + return index.getObjectId(indexPosInOffsetOrder[nthPosition]); } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexFactory.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexFactory.java index 2bf2f79cc..b16da5ae8 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexFactory.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexFactory.java @@ -10,6 +10,20 @@ package org.eclipse.jgit.internal.storage.file; +import static org.eclipse.jgit.internal.storage.file.PackReverseIndex.MAGIC; +import static org.eclipse.jgit.internal.storage.file.PackReverseIndex.VERSION_1; + +import java.io.DataInput; +import java.io.IOException; +import java.io.InputStream; +import java.security.DigestInputStream; +import java.text.MessageFormat; +import java.util.Arrays; + +import org.eclipse.jgit.internal.JGitText; +import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.util.IO; + /** * Factory for creating instances of {@link PackReverseIndex}. */ @@ -26,4 +40,45 @@ public final class PackReverseIndexFactory { public static PackReverseIndex computeFromIndex(PackIndex packIndex) { return new PackReverseIndexComputed(packIndex); } + + /** + * Read an in-memory pack reverse index from the given input stream. This + * has a linear runtime. + * + * @param src + * the input stream to read the contents from + * @param objectCount + * the number of objects in the corresponding pack + * @param packIndexSupplier + * a function to lazily get the corresponding forward index + * @return the reverse index instance + * @throws IOException + * if reading from the input stream fails + */ + static PackReverseIndex readFromFile(InputStream src, long objectCount, + PackBitmapIndex.SupplierWithIOException packIndexSupplier) + throws IOException { + final DigestInputStream digestIn = new DigestInputStream(src, + Constants.newMessageDigest()); + + final byte[] magic = new byte[MAGIC.length]; + IO.readFully(digestIn, magic); + if (!Arrays.equals(magic, MAGIC)) { + throw new IOException( + MessageFormat.format(JGitText.get().expectedGot, + Arrays.toString(MAGIC), Arrays.toString(magic))); + } + + DataInput dataIn = new SimpleDataInput(digestIn); + int version = dataIn.readInt(); + switch (version) { + case VERSION_1: + return new PackReverseIndexV1(digestIn, objectCount, + packIndexSupplier); + default: + throw new IOException(MessageFormat.format( + JGitText.get().unsupportedPackReverseIndexVersion, + String.valueOf(version))); + } + } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexV1.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexV1.java new file mode 100644 index 000000000..76f0793cc --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexV1.java @@ -0,0 +1,182 @@ +/* + * Copyright (C) 2022, Google LLC and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.eclipse.jgit.internal.storage.file; + +import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH; + +import java.io.DataInput; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.security.DigestInputStream; +import java.text.MessageFormat; + +import org.eclipse.jgit.errors.CorruptObjectException; +import org.eclipse.jgit.internal.JGitText; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.util.IO; + +/** + * Reverse index for forward pack index which is parsed from a version 1 file. + *

+ * The file format is specified at + * https://git-scm.com/docs/pack-format#_pack_rev_files_have_the_format. + */ +final class PackReverseIndexV1 implements PackReverseIndex { + static final int OID_VERSION_SHA1 = 1; + + static final int OID_VERSION_SHA256 = 2; + + private static final int SHA1_BYTES = OBJECT_ID_LENGTH; + + private final DigestInputStream inputStream; + + private final DataInput dataIn; + + /** + * A lazy supplier for the corresponding PackIndex. The PackIndex is not + * needed during instantiation and parsing, only later when querying the + * reverse index. Allow lazy loading so that the parsing of the forward and + * reverse indices could happen in parallel. + */ + private final PackBitmapIndex.SupplierWithIOException packIndexSupplier; + + private int objectCount; + + private byte[] packChecksum; + + private int[] indexPositionsSortedByOffset; + + private PackIndex packIndex; + + PackReverseIndexV1(DigestInputStream inputStream, long objectCount, + PackBitmapIndex.SupplierWithIOException packIndexSupplier) + throws IOException { + try { + this.objectCount = Math.toIntExact(objectCount); + } catch (ArithmeticException e) { + throw new IllegalArgumentException( + JGitText.get().hugeIndexesAreNotSupportedByJgitYet); + } + + this.inputStream = inputStream; + dataIn = new SimpleDataInput(inputStream); + + int oid_version = dataIn.readInt(); + switch (oid_version) { + case OID_VERSION_SHA1: + // JGit Pack only supports AnyObjectId, which represents SHA1. + break; + case OID_VERSION_SHA256: + throw new IOException(MessageFormat.format( + JGitText.get().unsupportedObjectIdVersion, "SHA256")); //$NON-NLS-1$ + default: + throw new IOException(MessageFormat.format( + JGitText.get().unsupportedObjectIdVersion, + String.valueOf(oid_version))); + } + + indexPositionsSortedByOffset = new int[this.objectCount]; + this.packIndexSupplier = packIndexSupplier; + + parseBody(); + parseChecksums(); + } + + private void parseBody() throws IOException { + for (int i = 0; i < objectCount; i++) { + indexPositionsSortedByOffset[i] = dataIn.readInt(); + } + } + + private void parseChecksums() throws IOException { + packChecksum = new byte[SHA1_BYTES]; + IO.readFully(inputStream, packChecksum); + // TODO: verify checksum + + byte[] readSelfChecksum = new byte[SHA1_BYTES]; + IO.readFully(inputStream, readSelfChecksum); + } + + @Override + public ObjectId findObject(long offset) { + int reversePosition = findPosition(offset); + if (reversePosition < 0) { + return null; + } + int forwardPosition = findForwardPositionByReversePosition( + reversePosition); + return getPackIndex().getObjectId(forwardPosition); + } + + @Override + public long findNextOffset(long offset, long maxOffset) + throws CorruptObjectException { + int position = findPosition(offset); + if (position < 0) { + throw new CorruptObjectException(MessageFormat.format(JGitText + .get().cantFindObjectInReversePackIndexForTheSpecifiedOffset, + Long.valueOf(offset))); + } + if (position + 1 == objectCount) { + return maxOffset; + } + return findOffsetByReversePosition(position + 1); + } + + @Override + public int findPosition(long offset) { + return binarySearchByOffset(offset); + } + + @Override + public ObjectId findObjectByPosition(int position) { + return getPackIndex() + .getObjectId(findForwardPositionByReversePosition(position)); + } + + private long findOffsetByReversePosition(int position) { + return getPackIndex() + .getOffset(findForwardPositionByReversePosition(position)); + } + + private int findForwardPositionByReversePosition(int reversePosition) { + assert (reversePosition >= 0); + assert (reversePosition < indexPositionsSortedByOffset.length); + return indexPositionsSortedByOffset[reversePosition]; + } + + private int binarySearchByOffset(long wantedOffset) { + int low = 0; + int high = objectCount - 1; + while (low <= high) { + int mid = (low + high) >>> 1; + long offsetAtMid = findOffsetByReversePosition(mid); + if (offsetAtMid == wantedOffset) { + return mid; + } else if (offsetAtMid > wantedOffset) { + high = mid - 1; + } else { + low = mid + 1; + } + } + return -1; + } + + private PackIndex getPackIndex() { + if (packIndex == null) { + try { + packIndex = packIndexSupplier.get(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + return packIndex; + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriter.java index 4c8417b11..94a4e9c12 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriter.java @@ -9,6 +9,8 @@ */ package org.eclipse.jgit.internal.storage.file; +import static org.eclipse.jgit.internal.storage.file.PackReverseIndex.VERSION_1; + import java.io.BufferedOutputStream; import java.io.DataOutput; import java.io.IOException; @@ -28,16 +30,6 @@ * https://git-scm.com/docs/pack-format#_pack_rev_files_have_the_format. */ public abstract class PackReverseIndexWriter { - /** - * Magic bytes that uniquely identify git reverse index files. - */ - protected static byte[] MAGIC = { 'R', 'I', 'D', 'X' }; - - /** - * The first reverse index file version. - */ - protected static final int VERSION_1 = 1; - /** * Stream to write contents to while maintaining a checksum. */ diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterV1.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterV1.java index 7630724d0..ff1809f44 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterV1.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterV1.java @@ -9,6 +9,10 @@ */ package org.eclipse.jgit.internal.storage.file; +import static org.eclipse.jgit.internal.storage.file.PackReverseIndex.MAGIC; +import static org.eclipse.jgit.internal.storage.file.PackReverseIndex.VERSION_1; +import static org.eclipse.jgit.internal.storage.file.PackReverseIndexV1.OID_VERSION_SHA1; + import java.io.IOException; import java.io.OutputStream; import java.util.List; @@ -24,8 +28,6 @@ * https://git-scm.com/docs/pack-format#_pack_rev_files_have_the_format. */ final class PackReverseIndexWriterV1 extends PackReverseIndexWriter { - private static final int OID_VERSION_SHA1 = 1; - private static final int DEFAULT_OID_VERSION = OID_VERSION_SHA1; PackReverseIndexWriterV1(final OutputStream dst) {