From 64615b44e6f4a7f14a43ba19d4dd28f8483ed2eb Mon Sep 17 00:00:00 2001 From: Anna Papitto Date: Thu, 27 Apr 2023 11:01:30 -0700 Subject: [PATCH] PackReverseIndexWriter: write out version 1 reverse index file The reverse index for a pack is used to quickly find an object's position in the pack's forward index based on that object's pack offset. It is currently computed from the forward index by sorting the index entries by the corresponding pack offset. This computation uses bucket sort with insertion sort, which has an average runtime of O(n log n) and worst case runtime of O(n^2); and memory usage of 3*size(int)*n because it maintains 3 int arrays, even after sorting is completed. The computation must be performed every time that the reverse index object is created in memory. In contrast, Cgit persists a pack reverse index file to avoid recomputing the reverse index ordering every time that it is needed. Instead they write a file with format https://git-scm.com/docs/pack-format#_pack_rev_files_have_the_format which can later be read and parsed into an in-memory reverse index each time it is needed. Introduce these reverse index files to JGit. PackReverseIndexWriter writes out a reverse index file to be read later when needed. Subclass PackReverseIndexWriterV1 writes a file with the official version 1 format. To avoid temporarily allocating an Integer collection while sorting and writing out the contents, using memory 4*size(Integer)*n, use an IntList and its #sort method, which uses quicksort. Change-Id: I6437745777a16f723e2f1cfcce4e0d94e599dcee Signed-off-by: Anna Papitto --- .../file/PackReverseIndexWriterTest.java | 43 +++++ .../file/PackReverseIndexWriterV1Test.java | 122 ++++++++++++++ .../eclipse/jgit/internal/JGitText.properties | 1 + .../org/eclipse/jgit/internal/JGitText.java | 1 + .../storage/file/PackReverseIndexWriter.java | 149 ++++++++++++++++++ .../file/PackReverseIndexWriterV1.java | 75 +++++++++ 6 files changed, 391 insertions(+) create mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterTest.java create mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterV1Test.java create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriter.java create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterV1.java diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterTest.java new file mode 100644 index 000000000..220976471 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterTest.java @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2023, Google LLC and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.eclipse.jgit.internal.storage.file; + +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayOutputStream; + +import org.junit.Test; + +public class PackReverseIndexWriterTest { + + @Test + public void createWriter_defaultVersion() { + PackReverseIndexWriter version1 = PackReverseIndexWriter + .createWriter(new ByteArrayOutputStream()); + + assertTrue(version1 instanceof PackReverseIndexWriterV1); + } + + @Test + public void createWriter_version1() { + PackReverseIndexWriter version1 = PackReverseIndexWriter + .createWriter(new ByteArrayOutputStream(), 1); + + assertTrue(version1 instanceof PackReverseIndexWriterV1); + } + + @Test + public void createWriter_unsupportedVersion() { + assertThrows(IllegalArgumentException.class, + () -> PackReverseIndexWriter + .createWriter(new ByteArrayOutputStream(), 2)); + } +} diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterV1Test.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterV1Test.java new file mode 100644 index 000000000..0124887af --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterV1Test.java @@ -0,0 +1,122 @@ +/* + * Copyright (C) 2023, Google LLC and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.eclipse.jgit.internal.storage.file; + +import static org.eclipse.jgit.lib.Constants.OBJECT_ID_STRING_LENGTH; +import static org.eclipse.jgit.lib.Constants.OBJ_BLOB; +import static org.eclipse.jgit.lib.Constants.OBJ_COMMIT; +import static org.eclipse.jgit.lib.Constants.OBJ_TREE; +import static org.junit.Assert.assertArrayEquals; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.transport.PackedObjectInfo; +import org.junit.Test; + +public class PackReverseIndexWriterV1Test { + + private static byte[] PACK_CHECKSUM = new byte[] { 'P', 'A', 'C', 'K', 'C', + 'H', 'E', 'C', 'K', 'S', 'U', 'M', '3', '4', '5', '6', '7', '8', + '9', '0', }; + + @Test + public void write_noObjects() throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + PackReverseIndexWriter writer = PackReverseIndexWriter.createWriter(out, + 1); + List objectsSortedByName = new ArrayList<>(); + + writer.write(objectsSortedByName, PACK_CHECKSUM); + + byte[] expected = new byte[] { 'R', 'I', 'D', 'X', // magic + 0x00, 0x00, 0x00, 0x01, // file version + 0x00, 0x00, 0x00, 0x01, // oid version + // pack checksum to copy into at byte 12 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // checksum + (byte) 0xd1, 0x1d, 0x17, (byte) 0xd5, (byte) 0xa1, 0x5c, + (byte) 0x8f, 0x45, 0x7e, 0x06, (byte) 0x91, (byte) 0xf2, 0x7e, + 0x20, 0x35, 0x2c, (byte) 0xdc, 0x4c, 0x46, (byte) 0xe4, }; + System.arraycopy(PACK_CHECKSUM, 0, expected, 12, PACK_CHECKSUM.length); + assertArrayEquals(expected, out.toByteArray()); + } + + @Test + public void write_oneObject() throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + PackReverseIndexWriter writer = PackReverseIndexWriter.createWriter(out, + 1); + PackedObjectInfo a = objectInfo("a", OBJ_COMMIT, 0); + List objectsSortedByName = List.of(a); + + writer.write(objectsSortedByName, PACK_CHECKSUM); + + byte[] expected = new byte[] { 'R', 'I', 'D', 'X', // magic + 0x00, 0x00, 0x00, 0x01, // file version + 0x00, 0x00, 0x00, 0x01, // oid version + 0x00, 0x00, 0x00, 0x00, // 0: "a" -> 0 + // pack checksum to copy into at byte 16 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // checksum + 0x48, 0x04, 0x29, 0x69, 0x2f, (byte) 0xf3, (byte) 0x80, + (byte) 0xa1, (byte) 0xf5, (byte) 0x92, (byte) 0xc2, 0x21, 0x46, + (byte) 0xd5, 0x08, 0x44, (byte) 0xa4, (byte) 0xf4, (byte) 0xc0, + (byte) 0xec, }; + System.arraycopy(PACK_CHECKSUM, 0, expected, 16, PACK_CHECKSUM.length); + assertArrayEquals(expected, out.toByteArray()); + } + + @Test + public void write_multipleObjects() throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + PackReverseIndexWriter writer = PackReverseIndexWriter.createWriter(out, + 1); + PackedObjectInfo a = objectInfo("a", OBJ_BLOB, 20); + PackedObjectInfo b = objectInfo("b", OBJ_COMMIT, 0); + PackedObjectInfo c = objectInfo("c", OBJ_COMMIT, 52); + PackedObjectInfo d = objectInfo("d", OBJ_TREE, 7); + PackedObjectInfo e = objectInfo("e", OBJ_COMMIT, 38); + List objectsSortedByName = List.of(a, b, c, d, e); + + writer.write(objectsSortedByName, PACK_CHECKSUM); + + byte[] expected = new byte[] { 'R', 'I', 'D', 'X', // magic + 0x00, 0x00, 0x00, 0x01, // file version + 0x00, 0x00, 0x00, 0x01, // oid version + 0x00, 0x00, 0x00, 0x01, // offset 0: "b" -> index @ 1 + 0x00, 0x00, 0x00, 0x03, // offset 7: "d" -> index @ 3 + 0x00, 0x00, 0x00, 0x00, // offset 20: "a" -> index @ 0 + 0x00, 0x00, 0x00, 0x04, // offset 38: "e" -> index @ 4 + 0x00, 0x00, 0x00, 0x02, // offset 52: "c" -> index @ 2 + // pack checksum to copy into at byte 32 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // checksum + (byte) 0xe3, 0x1c, (byte) 0xd3, (byte) 0xeb, (byte) 0x83, 0x47, + (byte) 0x80, (byte) 0xb1, (byte) 0xe0, 0x3c, 0x2b, 0x25, + (byte) 0xd6, 0x3e, (byte) 0xdc, (byte) 0xde, (byte) 0xbe, 0x4b, + 0x0a, (byte) 0xe2, }; + System.arraycopy(PACK_CHECKSUM, 0, expected, 32, PACK_CHECKSUM.length); + assertArrayEquals(expected, out.toByteArray()); + } + + private static PackedObjectInfo objectInfo(String objectId, int type, + long offset) { + assert (objectId.length() == 1); + PackedObjectInfo objectInfo = new PackedObjectInfo( + ObjectId.fromString(objectId.repeat(OBJECT_ID_STRING_LENGTH))); + objectInfo.setType(type); + objectInfo.setOffset(offset); + return objectInfo; + } +} diff --git a/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties b/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties index 78dd9bd4c..bc8144f1c 100644 --- a/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties +++ b/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties @@ -839,6 +839,7 @@ unsupportedGC=Unsupported garbage collector for repository type: {0} unsupportedMark=Mark not supported unsupportedOperationNotAddAtEnd=Not add-at-end: {0} unsupportedPackIndexVersion=Unsupported pack index version {0} +unsupportedPackReverseIndexVersion=Unsupported pack reverse index version {0} unsupportedPackVersion=Unsupported pack version {0}. unsupportedReftableVersion=Unsupported reftable version {0}. unsupportedRepositoryDescription=Repository description not supported diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java index 28634cbc4..518e0b7d9 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java @@ -868,6 +868,7 @@ public static JGitText get() { /***/ public String unsupportedMark; /***/ public String unsupportedOperationNotAddAtEnd; /***/ public String unsupportedPackIndexVersion; + /***/ public String unsupportedPackReverseIndexVersion; /***/ public String unsupportedPackVersion; /***/ public String unsupportedReftableVersion; /***/ public String unsupportedRepositoryDescription; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriter.java new file mode 100644 index 000000000..4c8417b11 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriter.java @@ -0,0 +1,149 @@ +/* + * Copyright (C) 2023, Google LLC and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.eclipse.jgit.internal.storage.file; + +import java.io.BufferedOutputStream; +import java.io.DataOutput; +import java.io.IOException; +import java.io.OutputStream; +import java.security.DigestOutputStream; +import java.text.MessageFormat; +import java.util.List; + +import org.eclipse.jgit.internal.JGitText; +import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.transport.PackedObjectInfo; + +/** + * Writes reverse index files conforming to the requested version. + *

+ * The reverse index file format is specified at + * https://git-scm.com/docs/pack-format#_pack_rev_files_have_the_format. + */ +public abstract class PackReverseIndexWriter { + /** + * Magic bytes that uniquely identify git reverse index files. + */ + protected static byte[] MAGIC = { 'R', 'I', 'D', 'X' }; + + /** + * The first reverse index file version. + */ + protected static final int VERSION_1 = 1; + + /** + * Stream to write contents to while maintaining a checksum. + */ + protected final DigestOutputStream out; + + /** + * Stream to write primitive type contents to while maintaining a checksum. + */ + protected final DataOutput dataOutput; + + private static final int DEFAULT_VERSION = VERSION_1; + + /** + * Construct the components of a PackReverseIndexWriter that are shared + * between subclasses. + * + * @param dst + * the OutputStream that the instance will write contents to + */ + protected PackReverseIndexWriter(OutputStream dst) { + out = new DigestOutputStream( + dst instanceof BufferedOutputStream ? dst + : new BufferedOutputStream(dst), + Constants.newMessageDigest()); + dataOutput = new SimpleDataOutput(out); + } + + /** + * Create a writer instance for the default file format version. + * + * @param dst + * the OutputStream that contents will be written to + * @return the new writer instance + */ + public static PackReverseIndexWriter createWriter(OutputStream dst) { + return createWriter(dst, DEFAULT_VERSION); + } + + /** + * Create a writer instance for the specified file format version. + * + * @param dst + * the OutputStream that contents will be written to + * @param version + * the reverse index format version to write contents as + * @return the new writer instance + */ + public static PackReverseIndexWriter createWriter(OutputStream dst, + int version) { + if (version == VERSION_1) { + return new PackReverseIndexWriterV1(dst); + } + throw new IllegalArgumentException(MessageFormat.format( + JGitText.get().unsupportedPackReverseIndexVersion, + Integer.toString(version))); + } + + /** + * Write the contents of a reverse index file for the given objects. + * + * @param objectsByIndexPos + * the objects whose forward index file positions should be + * written, sorted by forward index file position (currently SHA1 + * ordering) + * @param packChecksum + * the checksum of the corresponding pack file + * @throws IOException + * if writing the output fails + */ + public void write( + List objectsByIndexPos, + byte[] packChecksum) throws IOException { + writeHeader(); + writeBody(objectsByIndexPos); + writeFooter(packChecksum); + out.flush(); + } + + /** + * Write the header of a reverse index file, usually the magic bytes and the + * file format version. + * + * @throws IOException + * if writing the output fails + */ + protected abstract void writeHeader() throws IOException; + + /** + * Write the body of a reverse index file, usually the forward index + * positions of the given objects, sorted by those objects' pack file + * offsets. + * + * @param objectsSortedByIndexPosition + * the objects whose forward index file positions should be + * written, sorted by forward index file position; not modified + * during method + * @throws IOException + * if writing the output fails + */ + protected abstract void writeBody( + List objectsSortedByIndexPosition) + throws IOException; + + private void writeFooter(byte[] packChecksum) throws IOException { + out.write(packChecksum); + byte[] selfChecksum = out.getMessageDigest().digest(); + out.write(selfChecksum); + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterV1.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterV1.java new file mode 100644 index 000000000..7630724d0 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterV1.java @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2023, Google LLC and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.eclipse.jgit.internal.storage.file; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.List; + +import org.eclipse.jgit.transport.PackedObjectInfo; +import org.eclipse.jgit.util.IntList; +import org.eclipse.jgit.util.IntList.IntComparator; + +/** + * Writes reverse index files following the version 1 format. + *

+ * The file format is specified at + * https://git-scm.com/docs/pack-format#_pack_rev_files_have_the_format. + */ +final class PackReverseIndexWriterV1 extends PackReverseIndexWriter { + private static final int OID_VERSION_SHA1 = 1; + + private static final int DEFAULT_OID_VERSION = OID_VERSION_SHA1; + + PackReverseIndexWriterV1(final OutputStream dst) { + super(dst); + } + + @Override + protected void writeHeader() throws IOException { + out.write(MAGIC); + dataOutput.writeInt(VERSION_1); + dataOutput.writeInt(DEFAULT_OID_VERSION); + } + + @Override + protected void writeBody(List objectsByIndexPos) + throws IOException { + IntList positionsByOffset = IntList.filledWithRange(0, + objectsByIndexPos.size()); + positionsByOffset + .sort(new IndexPositionsByOffsetComparator(objectsByIndexPos)); + + for (int i = 0; i < positionsByOffset.size(); i++) { + int indexPosition = positionsByOffset.get(i); + dataOutput.writeInt(indexPosition); + } + } + + private static class IndexPositionsByOffsetComparator + implements IntComparator { + private List objectsByIndexPos; + + private IndexPositionsByOffsetComparator( + List objectsByIndexPos) { + this.objectsByIndexPos = objectsByIndexPos; + } + + @Override + public int compare(int firstIndexPosition, int secondIndexPosition) { + return Long.compare(getOffset(firstIndexPosition), + getOffset(secondIndexPosition)); + } + + private long getOffset(int indexPosition) { + return objectsByIndexPos.get(indexPosition).getOffset(); + } + } +}