diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsReaderTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsReaderTest.java new file mode 100644 index 000000000..2e18a7589 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsReaderTest.java @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2023, Google LLC. and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.eclipse.jgit.internal.storage.dfs; + +import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_MIN_BYTES_OBJ_SIZE_INDEX; +import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_PACK_SECTION; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertEquals; + +import java.io.IOException; + +import org.eclipse.jgit.junit.JGitTestUtil; +import org.eclipse.jgit.junit.TestRng; +import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.ObjectInserter; +import org.junit.Before; +import org.junit.Test; + +public class DfsReaderTest { + InMemoryRepository db; + + @Before + public void setUp() { + db = new InMemoryRepository(new DfsRepositoryDescription("test")); + } + + @Test + public void isNotLargerThan_noObjectSizeIndex() throws IOException { + setObjectSizeIndexMinBytes(-1); + ObjectId obj = insertBlobWithSize(10); + try (DfsReader ctx = db.getObjectDatabase().newReader()) { + assertFalse(ctx.isNotLargerThan(obj, Constants.OBJ_BLOB, 0)); + assertTrue(ctx.isNotLargerThan(obj, Constants.OBJ_BLOB, 10)); + assertTrue(ctx.isNotLargerThan(obj, Constants.OBJ_BLOB, 40)); + assertTrue(ctx.isNotLargerThan(obj, Constants.OBJ_BLOB, 50)); + assertTrue(ctx.isNotLargerThan(obj, Constants.OBJ_BLOB, 100)); + + assertEquals(5, ctx.stats.isNotLargerThanCallCount); + assertEquals(0, ctx.stats.objectSizeIndexMiss); + assertEquals(0, ctx.stats.objectSizeIndexHit); + } + } + + private ObjectId insertBlobWithSize(int size) + throws IOException { + TestRng testRng = new TestRng(JGitTestUtil.getName()); + ObjectId oid; + try (ObjectInserter ins = db.newObjectInserter()) { + oid = ins.insert(Constants.OBJ_BLOB, + testRng.nextBytes(size)); + ins.flush(); + } + return oid; + } + + private void setObjectSizeIndexMinBytes(int threshold) { + db.getConfig().setInt(CONFIG_PACK_SECTION, null, + CONFIG_KEY_MIN_BYTES_OBJ_SIZE_INDEX, threshold); + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackFile.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackFile.java index f61f8c61a..1f54795de 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackFile.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackFile.java @@ -16,6 +16,7 @@ import static org.eclipse.jgit.internal.storage.pack.PackExt.BITMAP_INDEX; import static org.eclipse.jgit.internal.storage.pack.PackExt.COMMIT_GRAPH; import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX; +import static org.eclipse.jgit.internal.storage.pack.PackExt.OBJECT_SIZE_INDEX; import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK; import static org.eclipse.jgit.internal.storage.pack.PackExt.REVERSE_INDEX; @@ -42,6 +43,8 @@ import org.eclipse.jgit.internal.storage.commitgraph.CommitGraphLoader; import org.eclipse.jgit.internal.storage.file.PackBitmapIndex; import org.eclipse.jgit.internal.storage.file.PackIndex; +import org.eclipse.jgit.internal.storage.file.PackObjectSizeIndex; +import org.eclipse.jgit.internal.storage.file.PackObjectSizeIndexLoader; import org.eclipse.jgit.internal.storage.file.PackReverseIndex; import org.eclipse.jgit.internal.storage.file.PackReverseIndexFactory; import org.eclipse.jgit.internal.storage.pack.BinaryDelta; @@ -76,6 +79,11 @@ public final class DfsPackFile extends BlockBasedFile { /** Index of compressed commit graph mapping entire object graph. */ private volatile CommitGraph commitGraph; + /** Index by size */ + private boolean objectSizeIndexLoadAttempted; + + private volatile PackObjectSizeIndex objectSizeIndex; + /** * Objects we have tried to read, and discovered to be corrupt. *

@@ -292,6 +300,39 @@ public PackReverseIndex getReverseIdx(DfsReader ctx) throws IOException { return reverseIndex; } + private PackObjectSizeIndex getObjectSizeIndex(DfsReader ctx) + throws IOException { + if (objectSizeIndex != null) { + return objectSizeIndex; + } + + if (objectSizeIndexLoadAttempted) { + // Pack doesn't have object size index + return null; + } + + DfsStreamKey objSizeKey = desc.getStreamKey(OBJECT_SIZE_INDEX); + AtomicBoolean cacheHit = new AtomicBoolean(true); + try { + DfsBlockCache.Ref sizeIdxRef = cache + .getOrLoadRef(objSizeKey, REF_POSITION, () -> { + cacheHit.set(false); + return loadObjectSizeIndex(ctx, objSizeKey); + }); + if (cacheHit.get()) { + ctx.stats.objectSizeIndexCacheHit++; + } + PackObjectSizeIndex sizeIdx = sizeIdxRef.get(); + if (sizeIdx != null) { + objectSizeIndex = sizeIdx; + } + } finally { + objectSizeIndexLoadAttempted = true; + } + + return objectSizeIndex; + } + /** * Check if an object is stored within this pack. * @@ -968,6 +1009,81 @@ long getObjectSize(DfsReader ctx, long pos) } } + /** + * Return if this packfile has an object size index attached. + * + * This loads the index if it is not already in memory. + * + * @param ctx + * reader context to support reading from the backing store if + * the object size index is not already loaded in memory. + * @return true if the packfile has an object size index. + * @throws IOException + * a problem accessing storage while looking for the index + */ + boolean hasObjectSizeIndex(DfsReader ctx) throws IOException { + return getObjectSizeIndex(ctx) != null; + } + + /** + * Return minimum size for an object to be included in the object size + * index. + * + * Caller must make sure the pack has an object size index with + * {@link #hasObjectSizeIndex} before calling this method. + * + * @param ctx + * reader context to support reading from the backing store if + * the object size index is not already loaded in memory. + * @return minimum size for indexing in bytes + * @throws IOException + * no object size index or a problem loading it. + */ + int getObjectSizeIndexThreshold(DfsReader ctx) throws IOException { + PackObjectSizeIndex index = getObjectSizeIndex(ctx); + if (index == null) { + throw new IOException("Asking threshold of non-existing obj-size"); //$NON-NLS-1$ + } + return index.getThreshold(); + } + + /** + * Return the size of the object from the object-size index. The object + * should be a blob. Any other type is not indexed and returns -1. + * + * Caller MUST be sure that the object is in the pack (e.g. with + * {@link #hasObject(DfsReader, AnyObjectId)}) and the pack has object size + * index (e.g. with {@link #hasObjectSizeIndex(DfsReader)}) before asking + * the indexed size. + * + * @param ctx + * reader context to support reading from the backing store if + * the object size index is not already loaded in memory. + * @param id + * object id of an object in the pack + * @return size of the object from the index. Negative if object is not in + * the index (below threshold or not a blob) + * @throws IOException + * could not read the object size index. IO problem or the pack + * doesn't have it. + */ + long getIndexedObjectSize(DfsReader ctx, AnyObjectId id) + throws IOException { + int idxPosition = idx(ctx).findPosition(id); + if (idxPosition < 0) { + throw new IllegalArgumentException( + "Cannot get size from index since object is not in pack"); //$NON-NLS-1$ + } + + PackObjectSizeIndex sizeIdx = getObjectSizeIndex(ctx); + if (sizeIdx == null) { + throw new IllegalStateException( + "Asking indexed size from a pack without object size index"); //$NON-NLS-1$ + } + + return sizeIdx.getSize(idxPosition); + } + void representation(DfsObjectRepresentation r, final long pos, DfsReader ctx, PackReverseIndex rev) throws IOException { @@ -1089,6 +1205,39 @@ private DfsBlockCache.Ref loadReverseIdx( revidx); } + private DfsBlockCache.Ref loadObjectSizeIndex( + DfsReader ctx, DfsStreamKey objectSizeIndexKey) throws IOException { + ctx.stats.readObjectSizeIndex++; + long start = System.nanoTime(); + long size = 0; + Exception parsingError = null; + try (ReadableChannel rc = ctx.db.openFile(desc, OBJECT_SIZE_INDEX)) { + try { + objectSizeIndex = PackObjectSizeIndexLoader + .load(Channels.newInputStream(rc)); + size = rc.position(); + } catch (IOException e) { + parsingError = e; + } + } catch (IOException e) { + // No object size index in this pack + return new DfsBlockCache.Ref<>(objectSizeIndexKey, REF_POSITION, 0, + null); + } + + if (parsingError != null) { + throw new IOException( + MessageFormat.format(DfsText.get().shortReadOfIndex, + desc.getFileName(OBJECT_SIZE_INDEX)), + parsingError); + } + + ctx.stats.readObjectSizeIndexBytes += size; + ctx.stats.readObjectSizeIndexMicros += elapsedMicros(start); + return new DfsBlockCache.Ref<>(objectSizeIndexKey, REF_POSITION, size, + objectSizeIndex); + } + private DfsBlockCache.Ref loadBitmapIndex(DfsReader ctx, DfsStreamKey bitmapKey) throws IOException { ctx.stats.readBitmap++; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsReader.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsReader.java index 56fe5d508..d98ec5856 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsReader.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsReader.java @@ -49,6 +49,7 @@ import org.eclipse.jgit.lib.AsyncObjectSizeQueue; import org.eclipse.jgit.lib.BitmapIndex; import org.eclipse.jgit.lib.BitmapIndex.BitmapBuilder; +import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.InflaterCache; import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.lib.ObjectLoader; @@ -511,6 +512,59 @@ public long getObjectSize(AnyObjectId objectId, int typeHint) throw new MissingObjectException(objectId.copy(), typeHint); } + + @Override + public boolean isNotLargerThan(AnyObjectId objectId, int typeHint, + long limit) throws MissingObjectException, + IncorrectObjectTypeException, IOException { + DfsPackFile pack = findPackWithObject(objectId); + if (pack == null) { + if (typeHint == OBJ_ANY) { + throw new MissingObjectException(objectId.copy(), + JGitText.get().unknownObjectType2); + } + throw new MissingObjectException(objectId.copy(), typeHint); + } + + stats.isNotLargerThanCallCount += 1; + if (typeHint != Constants.OBJ_BLOB || !pack.hasObjectSizeIndex(this)) { + return pack.getObjectSize(this, objectId) <= limit; + } + + long sz = pack.getIndexedObjectSize(this, objectId); + if (sz < 0) { + stats.objectSizeIndexMiss += 1; + } else { + stats.objectSizeIndexHit += 1; + } + + // Got size from index or we didn't but we are sure it should be there. + if (sz >= 0 || pack.getObjectSizeIndexThreshold(this) <= limit) { + return sz <= limit; + } + + return pack.getObjectSize(this, objectId) <= limit; + } + + private DfsPackFile findPackWithObject(AnyObjectId objectId) + throws IOException { + if (last != null && !skipGarbagePack(last) + && last.hasObject(this, objectId)) { + return last; + } + PackList packList = db.getPackList(); + // hasImpl doesn't check "last", but leaves "last" pointing to the pack + // with the object + if (hasImpl(packList, objectId)) { + return last; + } else if (packList.dirty()) { + if (hasImpl(db.getPackList(), objectId)) { + return last; + } + } + return null; + } + private long getObjectSizeImpl(PackList packList, AnyObjectId objectId) throws IOException { for (DfsPackFile pack : packList.packs) { diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsReaderIoStats.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsReaderIoStats.java index 5ac7985e9..adb4673ae 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsReaderIoStats.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsReaderIoStats.java @@ -10,12 +10,15 @@ package org.eclipse.jgit.internal.storage.dfs; +import org.eclipse.jgit.lib.AnyObjectId; + /** * IO statistics for a {@link org.eclipse.jgit.internal.storage.dfs.DfsReader}. */ public class DfsReaderIoStats { /** POJO to accumulate IO statistics. */ public static class Accumulator { + /** Number of times the reader explicitly called scanPacks. */ long scanPacks; @@ -31,6 +34,9 @@ public static class Accumulator { /** Total number of cache hits for commit graphs. */ long commitGraphCacheHit; + /** Total number of cache hits for object size indexes. */ + long objectSizeIndexCacheHit; + /** Total number of complete pack indexes read into memory. */ long readIdx; @@ -43,12 +49,18 @@ public static class Accumulator { /** Total number of complete commit graphs read into memory. */ long readCommitGraph; + /** Total number of object size indexes added into memory. */ + long readObjectSizeIndex; + /** Total number of bytes read from pack indexes. */ long readIdxBytes; /** Total number of bytes read from commit graphs. */ long readCommitGraphBytes; + /** Total numer of bytes read from object size index */ + long readObjectSizeIndexBytes; + /** Total microseconds spent reading pack indexes. */ long readIdxMicros; @@ -58,6 +70,9 @@ public static class Accumulator { /** Total microseconds spent creating commit graphs. */ long readCommitGraphMicros; + /** Total microseconds spent creating object size indexes */ + long readObjectSizeIndexMicros; + /** Total number of bytes read from bitmap indexes. */ long readBitmapIdxBytes; @@ -88,6 +103,15 @@ public static class Accumulator { /** Total microseconds spent inflating compressed bytes. */ long inflationMicros; + /** Count of queries for the size of an object via #isNotLargerThan */ + long isNotLargerThanCallCount; + + /** Object was below threshold in the object size index */ + long objectSizeIndexMiss; + + /** Object size found in the object size index */ + long objectSizeIndexHit; + Accumulator() { } } @@ -143,6 +167,15 @@ public long getCommitGraphCacheHits() { return stats.commitGraphCacheHit; } + /** + * Get total number of object size index cache hits. + * + * @return total number of object size index cache hits. + */ + public long getObjectSizeIndexCacheHits() { + return stats.objectSizeIndexCacheHit; + } + /** * Get total number of complete pack indexes read into memory. * @@ -179,6 +212,15 @@ public long getReadBitmapIndexCount() { return stats.readBitmap; } + /** + * Get total number of complete object size indexes read into memory. + * + * @return total number of complete object size indexes read into memory. + */ + public long getReadObjectSizeIndexCount() { + return stats.readObjectSizeIndex; + } + /** * Get total number of bytes read from pack indexes. * @@ -297,4 +339,41 @@ public long getInflatedBytes() { public long getInflationMicros() { return stats.inflationMicros; } + + /** + * Get count of invocations to + * {@link DfsReader#isNotLargerThan(AnyObjectId, int, long)} + *

+ * Each call could use the object-size index or not. + * + * @return how many times the size of an object was checked with + * {@link DfsReader#isNotLargerThan(AnyObjectId, int, long)} + */ + public long getIsNotLargerThanCallCount() { + return stats.isNotLargerThanCallCount; + } + + /** + * Get number of times the size of a blob was found in the object size + * index. + *

+ * This counts only queries for blobs on packs with object size index. + * + * @return count of object size index hits + */ + public long getObjectSizeIndexHits() { + return stats.objectSizeIndexHit; + } + + /** + * Get number of times the size of an object was not found in the object + * size index. This usually means it was below the threshold. + *

+ * This counts only queries for blobs on packs with object size index. + * + * @return count of object size index misses. + */ + public long getObjectSizeIndexMisses() { + return stats.objectSizeIndexMiss; + } }