diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackObjectSizeIndexV1Test.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackObjectSizeIndexV1Test.java new file mode 100644 index 000000000..11e374602 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackObjectSizeIndexV1Test.java @@ -0,0 +1,392 @@ +/* + * Copyright (C) 2022, Google LLC and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.eclipse.jgit.internal.storage.file; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.transport.PackedObjectInfo; +import org.eclipse.jgit.util.BlockList; +import org.junit.Test; + +public class PackObjectSizeIndexV1Test { + private static final ObjectId OBJ_ID = ObjectId + .fromString("b8b1d53172fb3fb19647adce4b38fab4371c2454"); + + private static final long GB = 1 << 30; + + private static final int MAX_24BITS_UINT = 0xffffff; + + @Test + public void write_24bPositions_32bSizes() throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + PackObjectSizeIndexWriter writer = PackObjectSizeIndexWriter + .createWriter(out, 0); + List objs = new ArrayList<>(); + objs.add(blobWithSize(100)); + objs.add(blobWithSize(400)); + objs.add(blobWithSize(200)); + + writer.write(objs); + byte[] expected = new byte[] { -1, 's', 'i', 'z', // header + 0x01, // version + 0x00, 0x00, 0x00, 0x00, // minimum object size + 0x00, 0x00, 0x00, 0x03, // obj count + 0x18, // Unsigned 3 bytes + 0x00, 0x00, 0x00, 0x03, // 3 positions + 0x00, 0x00, 0x00, // positions + 0x00, 0x00, 0x01, // + 0x00, 0x00, 0x02, // + 0x00, // No more positions + 0x00, 0x00, 0x00, 0x64, // size 100 + 0x00, 0x00, 0x01, (byte) 0x90, // size 400 + 0x00, 0x00, 0x00, (byte) 0xc8, // size 200 + 0x00, 0x00, 0x00, 0x00 // 64bit sizes counter + }; + byte[] output = out.toByteArray(); + assertArrayEquals(expected, output); + } + + @Test + public void write_32bPositions_32bSizes() throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + PackObjectSizeIndexWriter writer = PackObjectSizeIndexWriter + .createWriter(out, 0); + List objs = new BlockList<>(9_000_000); + // The 24 bit range is full of commits and trees + PackedObjectInfo commit = objInfo(Constants.OBJ_COMMIT, 100); + for (int i = 0; i <= MAX_24BITS_UINT; i++) { + objs.add(commit); + } + objs.add(blobWithSize(100)); + objs.add(blobWithSize(400)); + objs.add(blobWithSize(200)); + + writer.write(objs); + byte[] expected = new byte[] { -1, 's', 'i', 'z', // header + 0x01, // version + 0x00, 0x00, 0x00, 0x00, // minimum object size + 0x00, 0x00, 0x00, 0x03, // obj count + (byte) 0x20, // Signed 4 bytes + 0x00, 0x00, 0x00, 0x03, // 3 positions + 0x01, 0x00, 0x00, 0x00, // positions + 0x01, 0x00, 0x00, 0x01, // + 0x01, 0x00, 0x00, 0x02, // + 0x00, // No more positions + 0x00, 0x00, 0x00, 0x64, // size 100 + 0x00, 0x00, 0x01, (byte) 0x90, // size 400 + 0x00, 0x00, 0x00, (byte) 0xc8, // size 200 + 0x00, 0x00, 0x00, 0x00 // 64bit sizes counter + }; + byte[] output = out.toByteArray(); + assertArrayEquals(expected, output); + } + + @Test + public void write_24b32bPositions_32bSizes() throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + PackObjectSizeIndexWriter writer = PackObjectSizeIndexWriter + .createWriter(out, 0); + List objs = new BlockList<>(9_000_000); + // The 24 bit range is full of commits and trees + PackedObjectInfo commit = objInfo(Constants.OBJ_COMMIT, 100); + for (int i = 0; i < MAX_24BITS_UINT; i++) { + objs.add(commit); + } + objs.add(blobWithSize(100)); + objs.add(blobWithSize(400)); + objs.add(blobWithSize(200)); + + writer.write(objs); + byte[] expected = new byte[] { -1, 's', 'i', 'z', // header + 0x01, // version + 0x00, 0x00, 0x00, 0x00, // minimum object size + 0x00, 0x00, 0x00, 0x03, // obj count + 0x18, // 3 bytes + 0x00, 0x00, 0x00, 0x01, // 1 position + (byte) 0xff, (byte) 0xff, (byte) 0xff, + (byte) 0x20, // 4 bytes (32 bits) + 0x00, 0x00, 0x00, 0x02, // 2 positions + 0x01, 0x00, 0x00, 0x00, // positions + 0x01, 0x00, 0x00, 0x01, // + 0x00, // No more positions + 0x00, 0x00, 0x00, 0x64, // size 100 + 0x00, 0x00, 0x01, (byte) 0x90, // size 400 + 0x00, 0x00, 0x00, (byte) 0xc8, // size 200 + 0x00, 0x00, 0x00, 0x00 // 64bit sizes counter + }; + byte[] output = out.toByteArray(); + assertArrayEquals(expected, output); + } + + @Test + public void write_64bitsSize() throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + PackObjectSizeIndexWriter writer = PackObjectSizeIndexWriter + .createWriter(out, 0); + List objs = new ArrayList<>(); + objs.add(blobWithSize(100)); + objs.add(blobWithSize(3 * GB)); + objs.add(blobWithSize(4 * GB)); + objs.add(blobWithSize(400)); + + writer.write(objs); + byte[] expected = new byte[] { -1, 's', 'i', 'z', // header + 0x01, // version + 0x00, 0x00, 0x00, 0x00, // minimum object size + 0x00, 0x00, 0x00, 0x04, // Object count + 0x18, // Unsigned 3 byte positions + 0x00, 0x00, 0x00, 0x04, // 4 positions + 0x00, 0x00, 0x00, // positions + 0x00, 0x00, 0x01, // + 0x00, 0x00, 0x02, // + 0x00, 0x00, 0x03, // + 0x00, // No more positions + 0x00, 0x00, 0x00, 0x64, // size 100 + (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, // -1 (3GB) + (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xfe, // -2 (4GB) + 0x00, 0x00, 0x01, (byte) 0x90, // size 400 + 0x00, 0x00, 0x00, (byte) 0x02, // 64bit sizes counter (2) + 0x00, 0x00, 0x00, 0x00, // size 3Gb + (byte) 0xc0, 0x00, 0x00, 0x00, // + 0x00, 0x00, 0x00, 0x01, // size 4GB + (byte) 0x00, 0x00, 0x00, 0x00, // + 0x00, 0x00, 0x00, 0x00 // 128bit sizes counter + }; + byte[] output = out.toByteArray(); + assertArrayEquals(expected, output); + } + + @Test + public void write_allObjectsTooSmall() throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + PackObjectSizeIndexWriter writer = PackObjectSizeIndexWriter + .createWriter(out, 1 << 10); + List objs = new ArrayList<>(); + // too small blobs + objs.add(blobWithSize(100)); + objs.add(blobWithSize(200)); + objs.add(blobWithSize(400)); + + writer.write(objs); + byte[] expected = new byte[] { -1, 's', 'i', 'z', // header + 0x01, // version + 0x00, 0x00, 0x04, 0x00, // minimum object size + 0x00, 0x00, 0x00, 0x00, // Object count + }; + byte[] output = out.toByteArray(); + assertArrayEquals(expected, output); + } + + @Test + public void write_onlyBlobsIndexed() throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + PackObjectSizeIndexWriter writer = PackObjectSizeIndexWriter + .createWriter(out, 0); + List objs = new ArrayList<>(); + objs.add(objInfo(Constants.OBJ_COMMIT, 1000)); + objs.add(blobWithSize(100)); + objs.add(objInfo(Constants.OBJ_TAG, 1000)); + objs.add(blobWithSize(400)); + objs.add(blobWithSize(200)); + + writer.write(objs); + byte[] expected = new byte[] { -1, 's', 'i', 'z', // header + 0x01, // version + 0x00, 0x00, 0x00, 0x00, // minimum object size + 0x00, 0x00, 0x00, 0x03, // Object count + 0x18, // Positions in 3 bytes + 0x00, 0x00, 0x00, 0x03, // 3 entries + 0x00, 0x00, 0x01, // positions + 0x00, 0x00, 0x03, // + 0x00, 0x00, 0x04, // + 0x00, // No more positions + 0x00, 0x00, 0x00, 0x64, // size 100 + 0x00, 0x00, 0x01, (byte) 0x90, // size 400 + 0x00, 0x00, 0x00, (byte) 0xc8, // size 200 + 0x00, 0x00, 0x00, 0x00 // 64bit sizes counter + }; + byte[] output = out.toByteArray(); + assertArrayEquals(expected, output); + } + + @Test + public void write_noObjects() throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + PackObjectSizeIndexWriter writer = PackObjectSizeIndexWriter + .createWriter(out, 0); + List objs = new ArrayList<>(); + + writer.write(objs); + byte[] expected = new byte[] { -1, 's', 'i', 'z', // header + 0x01, // version + 0x00, 0x00, 0x00, 0x00, // minimum object size + 0x00, 0x00, 0x00, 0x00, // Object count + }; + byte[] output = out.toByteArray(); + assertArrayEquals(expected, output); + } + + @Test + public void read_empty() throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + PackObjectSizeIndexWriter writer = PackObjectSizeIndexWriter + .createWriter(out, 0); + List objs = new ArrayList<>(); + + writer.write(objs); + ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); + PackObjectSizeIndex index = PackObjectSizeIndexLoader.load(in); + assertEquals(-1, index.getSize(0)); + assertEquals(-1, index.getSize(1)); + assertEquals(-1, index.getSize(1 << 30)); + assertEquals(0, index.getThreshold()); + } + + @Test + public void read_only24bitsPositions() throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + PackObjectSizeIndexWriter writer = PackObjectSizeIndexWriter + .createWriter(out, 0); + List objs = new ArrayList<>(); + objs.add(blobWithSize(100)); + objs.add(blobWithSize(200)); + objs.add(blobWithSize(400)); + objs.add(blobWithSize(1500)); + + writer.write(objs); + ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); + PackObjectSizeIndex index = PackObjectSizeIndexLoader.load(in); + assertEquals(100, index.getSize(0)); + assertEquals(200, index.getSize(1)); + assertEquals(400, index.getSize(2)); + assertEquals(1500, index.getSize(3)); + assertEquals(0, index.getThreshold()); + } + + @Test + public void read_only32bitsPositions() throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + PackObjectSizeIndexWriter writer = PackObjectSizeIndexWriter + .createWriter(out, 2000); + List objs = new ArrayList<>(); + PackedObjectInfo smallObj = blobWithSize(100); + for (int i = 0; i <= MAX_24BITS_UINT; i++) { + objs.add(smallObj); + } + objs.add(blobWithSize(1000)); + objs.add(blobWithSize(3000)); + objs.add(blobWithSize(2500)); + objs.add(blobWithSize(1000)); + + writer.write(objs); + ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); + PackObjectSizeIndex index = PackObjectSizeIndexLoader.load(in); + assertEquals(-1, index.getSize(5)); + assertEquals(-1, index.getSize(MAX_24BITS_UINT+1)); + assertEquals(3000, index.getSize(MAX_24BITS_UINT+2)); + assertEquals(2500, index.getSize(MAX_24BITS_UINT+3)); + assertEquals(-1, index.getSize(MAX_24BITS_UINT+4)); // Not indexed + assertEquals(2000, index.getThreshold()); + } + + @Test + public void read_24and32BitsPositions() throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + PackObjectSizeIndexWriter writer = PackObjectSizeIndexWriter + .createWriter(out, 2000); + List objs = new ArrayList<>(); + PackedObjectInfo smallObj = blobWithSize(100); + for (int i = 0; i <= MAX_24BITS_UINT; i++) { + if (i == 500 || i == 1000 || i == 1500) { + objs.add(blobWithSize(2500)); + continue; + } + objs.add(smallObj); + } + objs.add(blobWithSize(3000)); + objs.add(blobWithSize(1000)); + objs.add(blobWithSize(2500)); + objs.add(blobWithSize(1000)); + + writer.write(objs); + ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); + PackObjectSizeIndex index = PackObjectSizeIndexLoader.load(in); + // 24 bit positions + assertEquals(-1, index.getSize(5)); + assertEquals(2500, index.getSize(500)); + // 32 bit positions + assertEquals(3000, index.getSize(MAX_24BITS_UINT+1)); + assertEquals(-1, index.getSize(MAX_24BITS_UINT+2)); + assertEquals(2500, index.getSize(MAX_24BITS_UINT+3)); + assertEquals(-1, index.getSize(MAX_24BITS_UINT+4)); // Not indexed + assertEquals(2000, index.getThreshold()); + } + + @Test + public void read_only64bits() throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + PackObjectSizeIndexWriter writer = PackObjectSizeIndexWriter + .createWriter(out, 0); + List objs = new ArrayList<>(); + objs.add(blobWithSize(3 * GB)); + objs.add(blobWithSize(8 * GB)); + + writer.write(objs); + ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); + PackObjectSizeIndex index = PackObjectSizeIndexLoader.load(in); + assertEquals(3 * GB, index.getSize(0)); + assertEquals(8 * GB, index.getSize(1)); + assertEquals(0, index.getThreshold()); + } + + @Test + public void read_withMinSize() throws IOException { + int minSize = 1000; + ByteArrayOutputStream out = new ByteArrayOutputStream(); + PackObjectSizeIndexWriter writer = PackObjectSizeIndexWriter + .createWriter(out, minSize); + List objs = new ArrayList<>(); + objs.add(blobWithSize(3 * GB)); + objs.add(blobWithSize(1500)); + objs.add(blobWithSize(500)); + objs.add(blobWithSize(1000)); + objs.add(blobWithSize(2000)); + + writer.write(objs); + ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); + PackObjectSizeIndex index = PackObjectSizeIndexLoader.load(in); + assertEquals(3 * GB, index.getSize(0)); + assertEquals(1500, index.getSize(1)); + assertEquals(-1, index.getSize(2)); + assertEquals(1000, index.getSize(3)); + assertEquals(2000, index.getSize(4)); + assertEquals(minSize, index.getThreshold()); + } + + private static PackedObjectInfo blobWithSize(long size) { + return objInfo(Constants.OBJ_BLOB, size); + } + + private static PackedObjectInfo objInfo(int type, long size) { + PackedObjectInfo objectInfo = new PackedObjectInfo(OBJ_ID); + objectInfo.setType(type); + objectInfo.setFullSize(size); + return objectInfo; + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackObjectSizeIndex.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackObjectSizeIndex.java new file mode 100644 index 000000000..1c3797c50 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackObjectSizeIndex.java @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2022, Google LLC and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.eclipse.jgit.internal.storage.file; + +/** + * Index of object sizes in a pack + * + * It is not guaranteed that the implementation contains the sizes of all + * objects (e.g. it could store only objects over certain threshold). + */ +public interface PackObjectSizeIndex { + + /** + * Returns the inflated size of the object. + * + * @param idxOffset + * position in the pack (as returned from PackIndex) + * @return size of the object, -1 if not found in the index. + */ + long getSize(int idxOffset); + + /** + * Number of objects in the index + * + * @return number of objects in the index + */ + long getObjectCount(); + + + /** + * Minimal size of an object to be included in this index + * + * Cut-off value used at generation time to decide what objects to index. + * + * @return size in bytes + */ + int getThreshold(); +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackObjectSizeIndexLoader.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackObjectSizeIndexLoader.java new file mode 100644 index 000000000..9d6941823 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackObjectSizeIndexLoader.java @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2022, Google LLC and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.eclipse.jgit.internal.storage.file; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; + +/** + * Chooses the specific implementation of the object-size index based on the + * file version. + */ +public class PackObjectSizeIndexLoader { + + /** + * Read an object size index from the stream + * + * @param in + * input stream at the beginning of the object size data + * @return an implementation of the object size index + * @throws IOException + * error reading the streams + */ + public static PackObjectSizeIndex load(InputStream in) throws IOException { + byte[] header = in.readNBytes(4); + if (!Arrays.equals(header, PackObjectSizeIndexWriter.HEADER)) { + throw new IOException("Stream is not an object index"); //$NON-NLS-1$ + } + + int version = in.readNBytes(1)[0]; + if (version != 1) { + throw new IOException("Unknown object size version: " + version); //$NON-NLS-1$ + } + return PackObjectSizeIndexV1.parse(in); + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackObjectSizeIndexV1.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackObjectSizeIndexV1.java new file mode 100644 index 000000000..68befc6ca --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackObjectSizeIndexV1.java @@ -0,0 +1,224 @@ +/* + * Copyright (C) 2022, Google LLC and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.eclipse.jgit.internal.storage.file; + +import java.io.IOException; +import java.io.InputStream; +import java.io.UnsupportedEncodingException; +import java.util.Arrays; + +import org.eclipse.jgit.util.NB; + +/** + * Memory representation of the object-size index + * + * The object size index is a map from position in the primary idx (i.e. + * position of the object-id in lexicographical order) to size. + * + * Most of the positions fit in unsigned 3 bytes (up to 16 million) + */ +class PackObjectSizeIndexV1 implements PackObjectSizeIndex { + + private static final byte BITS_24 = 0x18; + + private static final byte BITS_32 = 0x20; + + private final int threshold; + + private final UInt24Array positions24; + + private final int[] positions32; + + /** + * Parallel array to concat(positions24, positions32) with the size of the + * objects. + * + * A value >= 0 is the size of the object. A negative value means the size + * doesn't fit in an int and |value|-1 is the position for the size in the + * size64 array e.g. a value of -1 is sizes64[0], -2 = sizes64[1], ... + */ + private final int[] sizes32; + + private final long[] sizes64; + + static PackObjectSizeIndex parse(InputStream in) throws IOException { + /** Header and version already out of the input */ + IndexInputStreamReader stream = new IndexInputStreamReader(in); + int threshold = stream.readInt(); // minSize + int objCount = stream.readInt(); + if (objCount == 0) { + return new EmptyPackObjectSizeIndex(threshold); + } + return new PackObjectSizeIndexV1(stream, threshold, objCount); + } + + private PackObjectSizeIndexV1(IndexInputStreamReader stream, int threshold, + int objCount) throws IOException { + this.threshold = threshold; + UInt24Array pos24 = null; + int[] pos32 = null; + + byte positionEncoding; + while ((positionEncoding = stream.readByte()) != 0) { + if (Byte.compareUnsigned(positionEncoding, BITS_24) == 0) { + int sz = stream.readInt(); + pos24 = new UInt24Array(stream.readNBytes(sz * 3)); + } else if (Byte.compareUnsigned(positionEncoding, BITS_32) == 0) { + int sz = stream.readInt(); + pos32 = stream.readIntArray(sz); + } else { + throw new UnsupportedEncodingException( + String.format("Unknown position encoding %s", + Integer.toHexString(positionEncoding))); + } + } + positions24 = pos24 != null ? pos24 : UInt24Array.EMPTY; + positions32 = pos32 != null ? pos32 : new int[0]; + + sizes32 = stream.readIntArray(objCount); + int c64sizes = stream.readInt(); + if (c64sizes == 0) { + sizes64 = new long[0]; + return; + } + sizes64 = stream.readLongArray(c64sizes); + int c128sizes = stream.readInt(); + if (c128sizes != 0) { + // this MUST be 0 (we don't support 128 bits sizes yet) + throw new IOException("Unsupported sizes in object-size-index"); + } + } + + @Override + public long getSize(int idxOffset) { + int pos = -1; + if (!positions24.isEmpty() && idxOffset <= positions24.getLastValue()) { + pos = positions24.binarySearch(idxOffset); + } else if (positions32.length > 0 && idxOffset >= positions32[0]) { + int pos32 = Arrays.binarySearch(positions32, idxOffset); + if (pos32 >= 0) { + pos = pos32 + positions24.size(); + } + } + if (pos < 0) { + return -1; + } + + int objSize = sizes32[pos]; + if (objSize < 0) { + int secondPos = Math.abs(objSize) - 1; + return sizes64[secondPos]; + } + return objSize; + } + + @Override + public long getObjectCount() { + return positions24.size() + positions32.length; + } + + @Override + public int getThreshold() { + return threshold; + } + + /** + * Wrapper to read parsed content from the byte stream + */ + private static class IndexInputStreamReader { + + private final byte[] buffer = new byte[8]; + + private final InputStream in; + + IndexInputStreamReader(InputStream in) { + this.in = in; + } + + int readInt() throws IOException { + int n = in.readNBytes(buffer, 0, 4); + if (n < 4) { + throw new IOException( + "Unable to read a full int from the stream"); + } + return NB.decodeInt32(buffer, 0); + } + + int[] readIntArray(int intsCount) throws IOException { + if (intsCount == 0) { + return new int[0]; + } + + int[] dest = new int[intsCount]; + for (int i = 0; i < intsCount; i++) { + dest[i] = readInt(); + } + return dest; + } + + long readLong() throws IOException { + int n = in.readNBytes(buffer, 0, 8); + if (n < 8) { + throw new IOException( + "Unable to read a full int from the stream"); + } + return NB.decodeInt64(buffer, 0); + } + + long[] readLongArray(int longsCount) throws IOException { + if (longsCount == 0) { + return new long[0]; + } + + long[] dest = new long[longsCount]; + for (int i = 0; i < longsCount; i++) { + dest[i] = readLong(); + } + return dest; + } + + byte readByte() throws IOException { + int n = in.readNBytes(buffer, 0, 1); + if (n != 1) { + throw new IOException("Cannot read byte from stream"); + } + return buffer[0]; + } + + byte[] readNBytes(int sz) throws IOException { + return in.readNBytes(sz); + } + } + + private static class EmptyPackObjectSizeIndex + implements PackObjectSizeIndex { + + private final int threshold; + + EmptyPackObjectSizeIndex(int threshold) { + this.threshold = threshold; + } + + @Override + public long getSize(int idxOffset) { + return -1; + } + + @Override + public long getObjectCount() { + return 0; + } + + @Override + public int getThreshold() { + return threshold; + } + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackObjectSizeIndexWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackObjectSizeIndexWriter.java new file mode 100644 index 000000000..ae03797ca --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackObjectSizeIndexWriter.java @@ -0,0 +1,285 @@ +/* + * Copyright (C) 2022, Google LLC and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.eclipse.jgit.internal.storage.file; + +import java.io.BufferedOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.util.List; + +import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.transport.PackedObjectInfo; +import org.eclipse.jgit.util.NB; + +/** + * Write an object index in the output stream + */ +public abstract class PackObjectSizeIndexWriter { + + private static final int MAX_24BITS_UINT = 0xffffff; + + private static final PackObjectSizeIndexWriter NULL_WRITER = new PackObjectSizeIndexWriter() { + @Override + public void write(List objs) { + // Do nothing + } + }; + + /** Magic constant for the object size index file */ + protected static final byte[] HEADER = { -1, 's', 'i', 'z' }; + + /** + * Returns a writer for the latest index version + * + * @param os + * Output stream where to write the index + * @param minSize + * objects strictly smaller than this size won't be added to the + * index. Negative size won't write AT ALL. Other sizes could write + * an empty index. + * @return the index writer + */ + public static PackObjectSizeIndexWriter createWriter(OutputStream os, + int minSize) { + if (minSize < 0) { + return NULL_WRITER; + } + return new PackObjectSizeWriterV1(os, minSize); + } + + /** + * Add the objects to the index + * + * @param objs + * objects in the pack, in sha1 order. Their position in the list + * matches their position in the primary index. + * @throws IOException + * problem writing to the stream + */ + public abstract void write(List objs) + throws IOException; + + /** + * Object size index v1. + * + * Store position (in the main index) to size as parallel arrays. + * + *

Positions in the main index fit well in unsigned 24 bits (16M) for most + * repositories, but some outliers have even more objects, so we need to + * store also 32 bits positions. + * + *

Sizes are stored as a first array parallel to positions. If a size + * doesn't fit in an element of that array, then we encode there a position + * on the next-size array. This "overflow" array doesn't have entries for + * all positions. + * + *

+	 *
+	 *      positions       [10, 500, 1000, 1001]
+	 *      sizes (32bits)  [15MB, -1, 6MB, -2]
+   *                          ___/  ______/
+	 *                        /     /
+	 *      sizes (64 bits) [3GB, 6GB]
+	 * 
+ * + *

For sizes we use 32 bits as the first level and 64 for the rare objects + * over 2GB. + * + *

A 24/32/64 bits hierarchy of arrays saves space if we have a lot of small + * objects, but wastes space if we have only big ones. The min size to index is + * controlled by conf and in principle we want to index only rather + * big objects (e.g. > 10MB). We could support more dynamics read/write of sizes + * (e.g. 24 only if the threshold will include many of those objects) but it + * complicates a lot code and spec. If needed it could go for a v2 of the protocol. + * + *

Format: + * + *

  • A header with the magic number (4 bytes) + *
  • The index version (1 byte) + *
  • The minimum object size (4 bytes) + *
  • Total count of objects indexed (C, 4 bytes) + * (if count == 0, stop here) + * + * Blocks of + *
  • Size per entry in bits (1 byte, either 24 (0x18) or 32 (0x20)) + *
  • Count of entries (4 bytes) (c, as a signed int) + *
  • positions encoded in s bytes each (i.e s*c bytes) + * + *
  • 0 (as a "size-per-entry = 0", marking end of the section) + * + *
  • 32 bit sizes (C * 4 bytes). Negative size means + * nextLevel[abs(size)-1] + *
  • Count of 64 bit sizes (s64) (or 0 if no more indirections) + *
  • 64 bit sizes (s64 * 8 bytes) + *
  • 0 (end) + */ + static class PackObjectSizeWriterV1 extends PackObjectSizeIndexWriter { + + private final OutputStream os; + + private final int minObjSize; + + private final byte[] intBuffer = new byte[4]; + + PackObjectSizeWriterV1(OutputStream os, int minSize) { + this.os = new BufferedOutputStream(os); + this.minObjSize = minSize; + } + + @Override + public void write(List allObjects) + throws IOException { + os.write(HEADER); + writeUInt8(1); // Version + writeInt32(minObjSize); + + PackedObjectStats stats = countIndexableObjects(allObjects); + int[] indexablePositions = findIndexablePositions(allObjects, + stats.indexableObjs); + writeInt32(indexablePositions.length); // Total # of objects + if (indexablePositions.length == 0) { + os.flush(); + return; + } + + // Positions that fit in 3 bytes + if (stats.pos24Bits > 0) { + writeUInt8(24); + writeInt32(stats.pos24Bits); + applyToRange(indexablePositions, 0, stats.pos24Bits, + this::writeInt24); + } + // Positions that fit in 4 bytes + // We only use 31 bits due to sign, + // but that covers 2 billion objs + if (stats.pos31Bits > 0) { + writeUInt8(32); + writeInt32(stats.pos31Bits); + applyToRange(indexablePositions, stats.pos24Bits, + stats.pos24Bits + stats.pos31Bits, this::writeInt32); + } + writeUInt8(0); + writeSizes(allObjects, indexablePositions, stats.sizeOver2GB); + os.flush(); + } + + private void writeUInt8(int i) throws IOException { + if (i > 255) { + throw new IllegalStateException( + "Number doesn't fit in a single byte"); + } + NB.encodeInt32(intBuffer, 0, i); + os.write(intBuffer, 3, 1); + } + + private void writeInt24(int i) throws IOException { + NB.encodeInt24(intBuffer, 1, i); + os.write(intBuffer, 1, 3); + } + + private void writeInt32(int i) throws IOException { + NB.encodeInt32(intBuffer, 0, i); + os.write(intBuffer); + } + + private void writeSizes(List allObjects, + int[] indexablePositions, int objsBiggerThan2Gb) + throws IOException { + if (indexablePositions.length == 0) { + writeInt32(0); + return; + } + + byte[] sizes64bits = new byte[8 * objsBiggerThan2Gb]; + int s64 = 0; + for (int i = 0; i < indexablePositions.length; i++) { + PackedObjectInfo info = allObjects.get(indexablePositions[i]); + if (info.getFullSize() < Integer.MAX_VALUE) { + writeInt32((int) info.getFullSize()); + } else { + // Size needs more than 32 bits. Store -1 * offset in the + // next table as size. + writeInt32(-1 * (s64 + 1)); + NB.encodeInt64(sizes64bits, s64 * 8, info.getFullSize()); + s64++; + } + } + if (objsBiggerThan2Gb > 0) { + writeInt32(objsBiggerThan2Gb); + os.write(sizes64bits); + } + writeInt32(0); + } + + private int[] findIndexablePositions( + List allObjects, + int indexableObjs) { + int[] positions = new int[indexableObjs]; + int positionIdx = 0; + for (int i = 0; i < allObjects.size(); i++) { + PackedObjectInfo o = allObjects.get(i); + if (!shouldIndex(o)) { + continue; + } + positions[positionIdx++] = i; + } + return positions; + } + + private PackedObjectStats countIndexableObjects( + List objs) { + PackedObjectStats stats = new PackedObjectStats(); + for (int i = 0; i < objs.size(); i++) { + PackedObjectInfo o = objs.get(i); + if (!shouldIndex(o)) { + continue; + } + stats.indexableObjs++; + if (o.getFullSize() > Integer.MAX_VALUE) { + stats.sizeOver2GB++; + } + if (i <= MAX_24BITS_UINT) { + stats.pos24Bits++; + } else { + stats.pos31Bits++; + // i is a positive int, cannot be bigger than this + } + } + return stats; + } + + private boolean shouldIndex(PackedObjectInfo o) { + return (o.getType() == Constants.OBJ_BLOB) + && (o.getFullSize() >= minObjSize); + } + + private static class PackedObjectStats { + int indexableObjs; + + int pos24Bits; + + int pos31Bits; + + int sizeOver2GB; + } + + @FunctionalInterface + interface IntEncoder { + void encode(int i) throws IOException; + } + + private static void applyToRange(int[] allPositions, int start, int end, + IntEncoder encoder) throws IOException { + for (int i = start; i < end; i++) { + encoder.encode(allPositions[i]); + } + } + } +}