DfsReader/PackFile: Implement isNotLargerThan using the obj size idx

isNotLargerThan() can avoid reading the size of a blob from disk using
the object size idx if available.

Load the object size index in the DfsPackfile following the same
pattern than the other indices. Override isNotLargerThan in DfsReader
to use the index when available.

Following CL introduces the writing of the object size index and the
tests cover this code.

Change-Id: I15c95b84c1424707c487a7d29c5c46b1a9d0ceba
This commit is contained in:
Ivan Frade 2021-12-30 08:33:08 -08:00
parent 8a053b57ad
commit 9dace2e6d6
4 changed files with 351 additions and 0 deletions

View File

@ -0,0 +1,69 @@
/*
* Copyright (C) 2023, Google LLC. and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.internal.storage.dfs;
import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_MIN_BYTES_OBJ_SIZE_INDEX;
import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_PACK_SECTION;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.assertEquals;
import java.io.IOException;
import org.eclipse.jgit.junit.JGitTestUtil;
import org.eclipse.jgit.junit.TestRng;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectInserter;
import org.junit.Before;
import org.junit.Test;
public class DfsReaderTest {
InMemoryRepository db;
@Before
public void setUp() {
db = new InMemoryRepository(new DfsRepositoryDescription("test"));
}
@Test
public void isNotLargerThan_noObjectSizeIndex() throws IOException {
setObjectSizeIndexMinBytes(-1);
ObjectId obj = insertBlobWithSize(10);
try (DfsReader ctx = db.getObjectDatabase().newReader()) {
assertFalse(ctx.isNotLargerThan(obj, Constants.OBJ_BLOB, 0));
assertTrue(ctx.isNotLargerThan(obj, Constants.OBJ_BLOB, 10));
assertTrue(ctx.isNotLargerThan(obj, Constants.OBJ_BLOB, 40));
assertTrue(ctx.isNotLargerThan(obj, Constants.OBJ_BLOB, 50));
assertTrue(ctx.isNotLargerThan(obj, Constants.OBJ_BLOB, 100));
assertEquals(5, ctx.stats.isNotLargerThanCallCount);
assertEquals(0, ctx.stats.objectSizeIndexMiss);
assertEquals(0, ctx.stats.objectSizeIndexHit);
}
}
private ObjectId insertBlobWithSize(int size)
throws IOException {
TestRng testRng = new TestRng(JGitTestUtil.getName());
ObjectId oid;
try (ObjectInserter ins = db.newObjectInserter()) {
oid = ins.insert(Constants.OBJ_BLOB,
testRng.nextBytes(size));
ins.flush();
}
return oid;
}
private void setObjectSizeIndexMinBytes(int threshold) {
db.getConfig().setInt(CONFIG_PACK_SECTION, null,
CONFIG_KEY_MIN_BYTES_OBJ_SIZE_INDEX, threshold);
}
}

View File

@ -16,6 +16,7 @@
import static org.eclipse.jgit.internal.storage.pack.PackExt.BITMAP_INDEX;
import static org.eclipse.jgit.internal.storage.pack.PackExt.COMMIT_GRAPH;
import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX;
import static org.eclipse.jgit.internal.storage.pack.PackExt.OBJECT_SIZE_INDEX;
import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
import static org.eclipse.jgit.internal.storage.pack.PackExt.REVERSE_INDEX;
@ -42,6 +43,8 @@
import org.eclipse.jgit.internal.storage.commitgraph.CommitGraphLoader;
import org.eclipse.jgit.internal.storage.file.PackBitmapIndex;
import org.eclipse.jgit.internal.storage.file.PackIndex;
import org.eclipse.jgit.internal.storage.file.PackObjectSizeIndex;
import org.eclipse.jgit.internal.storage.file.PackObjectSizeIndexLoader;
import org.eclipse.jgit.internal.storage.file.PackReverseIndex;
import org.eclipse.jgit.internal.storage.file.PackReverseIndexFactory;
import org.eclipse.jgit.internal.storage.pack.BinaryDelta;
@ -76,6 +79,11 @@ public final class DfsPackFile extends BlockBasedFile {
/** Index of compressed commit graph mapping entire object graph. */
private volatile CommitGraph commitGraph;
/** Index by size */
private boolean objectSizeIndexLoadAttempted;
private volatile PackObjectSizeIndex objectSizeIndex;
/**
* Objects we have tried to read, and discovered to be corrupt.
* <p>
@ -292,6 +300,39 @@ public PackReverseIndex getReverseIdx(DfsReader ctx) throws IOException {
return reverseIndex;
}
private PackObjectSizeIndex getObjectSizeIndex(DfsReader ctx)
throws IOException {
if (objectSizeIndex != null) {
return objectSizeIndex;
}
if (objectSizeIndexLoadAttempted) {
// Pack doesn't have object size index
return null;
}
DfsStreamKey objSizeKey = desc.getStreamKey(OBJECT_SIZE_INDEX);
AtomicBoolean cacheHit = new AtomicBoolean(true);
try {
DfsBlockCache.Ref<PackObjectSizeIndex> sizeIdxRef = cache
.getOrLoadRef(objSizeKey, REF_POSITION, () -> {
cacheHit.set(false);
return loadObjectSizeIndex(ctx, objSizeKey);
});
if (cacheHit.get()) {
ctx.stats.objectSizeIndexCacheHit++;
}
PackObjectSizeIndex sizeIdx = sizeIdxRef.get();
if (sizeIdx != null) {
objectSizeIndex = sizeIdx;
}
} finally {
objectSizeIndexLoadAttempted = true;
}
return objectSizeIndex;
}
/**
* Check if an object is stored within this pack.
*
@ -968,6 +1009,81 @@ long getObjectSize(DfsReader ctx, long pos)
}
}
/**
* Return if this packfile has an object size index attached.
*
* This loads the index if it is not already in memory.
*
* @param ctx
* reader context to support reading from the backing store if
* the object size index is not already loaded in memory.
* @return true if the packfile has an object size index.
* @throws IOException
* a problem accessing storage while looking for the index
*/
boolean hasObjectSizeIndex(DfsReader ctx) throws IOException {
return getObjectSizeIndex(ctx) != null;
}
/**
* Return minimum size for an object to be included in the object size
* index.
*
* Caller must make sure the pack has an object size index with
* {@link #hasObjectSizeIndex} before calling this method.
*
* @param ctx
* reader context to support reading from the backing store if
* the object size index is not already loaded in memory.
* @return minimum size for indexing in bytes
* @throws IOException
* no object size index or a problem loading it.
*/
int getObjectSizeIndexThreshold(DfsReader ctx) throws IOException {
PackObjectSizeIndex index = getObjectSizeIndex(ctx);
if (index == null) {
throw new IOException("Asking threshold of non-existing obj-size"); //$NON-NLS-1$
}
return index.getThreshold();
}
/**
* Return the size of the object from the object-size index. The object
* should be a blob. Any other type is not indexed and returns -1.
*
* Caller MUST be sure that the object is in the pack (e.g. with
* {@link #hasObject(DfsReader, AnyObjectId)}) and the pack has object size
* index (e.g. with {@link #hasObjectSizeIndex(DfsReader)}) before asking
* the indexed size.
*
* @param ctx
* reader context to support reading from the backing store if
* the object size index is not already loaded in memory.
* @param id
* object id of an object in the pack
* @return size of the object from the index. Negative if object is not in
* the index (below threshold or not a blob)
* @throws IOException
* could not read the object size index. IO problem or the pack
* doesn't have it.
*/
long getIndexedObjectSize(DfsReader ctx, AnyObjectId id)
throws IOException {
int idxPosition = idx(ctx).findPosition(id);
if (idxPosition < 0) {
throw new IllegalArgumentException(
"Cannot get size from index since object is not in pack"); //$NON-NLS-1$
}
PackObjectSizeIndex sizeIdx = getObjectSizeIndex(ctx);
if (sizeIdx == null) {
throw new IllegalStateException(
"Asking indexed size from a pack without object size index"); //$NON-NLS-1$
}
return sizeIdx.getSize(idxPosition);
}
void representation(DfsObjectRepresentation r, final long pos,
DfsReader ctx, PackReverseIndex rev)
throws IOException {
@ -1089,6 +1205,39 @@ private DfsBlockCache.Ref<PackReverseIndex> loadReverseIdx(
revidx);
}
private DfsBlockCache.Ref<PackObjectSizeIndex> loadObjectSizeIndex(
DfsReader ctx, DfsStreamKey objectSizeIndexKey) throws IOException {
ctx.stats.readObjectSizeIndex++;
long start = System.nanoTime();
long size = 0;
Exception parsingError = null;
try (ReadableChannel rc = ctx.db.openFile(desc, OBJECT_SIZE_INDEX)) {
try {
objectSizeIndex = PackObjectSizeIndexLoader
.load(Channels.newInputStream(rc));
size = rc.position();
} catch (IOException e) {
parsingError = e;
}
} catch (IOException e) {
// No object size index in this pack
return new DfsBlockCache.Ref<>(objectSizeIndexKey, REF_POSITION, 0,
null);
}
if (parsingError != null) {
throw new IOException(
MessageFormat.format(DfsText.get().shortReadOfIndex,
desc.getFileName(OBJECT_SIZE_INDEX)),
parsingError);
}
ctx.stats.readObjectSizeIndexBytes += size;
ctx.stats.readObjectSizeIndexMicros += elapsedMicros(start);
return new DfsBlockCache.Ref<>(objectSizeIndexKey, REF_POSITION, size,
objectSizeIndex);
}
private DfsBlockCache.Ref<PackBitmapIndex> loadBitmapIndex(DfsReader ctx,
DfsStreamKey bitmapKey) throws IOException {
ctx.stats.readBitmap++;

View File

@ -49,6 +49,7 @@
import org.eclipse.jgit.lib.AsyncObjectSizeQueue;
import org.eclipse.jgit.lib.BitmapIndex;
import org.eclipse.jgit.lib.BitmapIndex.BitmapBuilder;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.InflaterCache;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectLoader;
@ -511,6 +512,59 @@ public long getObjectSize(AnyObjectId objectId, int typeHint)
throw new MissingObjectException(objectId.copy(), typeHint);
}
@Override
public boolean isNotLargerThan(AnyObjectId objectId, int typeHint,
long limit) throws MissingObjectException,
IncorrectObjectTypeException, IOException {
DfsPackFile pack = findPackWithObject(objectId);
if (pack == null) {
if (typeHint == OBJ_ANY) {
throw new MissingObjectException(objectId.copy(),
JGitText.get().unknownObjectType2);
}
throw new MissingObjectException(objectId.copy(), typeHint);
}
stats.isNotLargerThanCallCount += 1;
if (typeHint != Constants.OBJ_BLOB || !pack.hasObjectSizeIndex(this)) {
return pack.getObjectSize(this, objectId) <= limit;
}
long sz = pack.getIndexedObjectSize(this, objectId);
if (sz < 0) {
stats.objectSizeIndexMiss += 1;
} else {
stats.objectSizeIndexHit += 1;
}
// Got size from index or we didn't but we are sure it should be there.
if (sz >= 0 || pack.getObjectSizeIndexThreshold(this) <= limit) {
return sz <= limit;
}
return pack.getObjectSize(this, objectId) <= limit;
}
private DfsPackFile findPackWithObject(AnyObjectId objectId)
throws IOException {
if (last != null && !skipGarbagePack(last)
&& last.hasObject(this, objectId)) {
return last;
}
PackList packList = db.getPackList();
// hasImpl doesn't check "last", but leaves "last" pointing to the pack
// with the object
if (hasImpl(packList, objectId)) {
return last;
} else if (packList.dirty()) {
if (hasImpl(db.getPackList(), objectId)) {
return last;
}
}
return null;
}
private long getObjectSizeImpl(PackList packList, AnyObjectId objectId)
throws IOException {
for (DfsPackFile pack : packList.packs) {

View File

@ -10,12 +10,15 @@
package org.eclipse.jgit.internal.storage.dfs;
import org.eclipse.jgit.lib.AnyObjectId;
/**
* IO statistics for a {@link org.eclipse.jgit.internal.storage.dfs.DfsReader}.
*/
public class DfsReaderIoStats {
/** POJO to accumulate IO statistics. */
public static class Accumulator {
/** Number of times the reader explicitly called scanPacks. */
long scanPacks;
@ -31,6 +34,9 @@ public static class Accumulator {
/** Total number of cache hits for commit graphs. */
long commitGraphCacheHit;
/** Total number of cache hits for object size indexes. */
long objectSizeIndexCacheHit;
/** Total number of complete pack indexes read into memory. */
long readIdx;
@ -43,12 +49,18 @@ public static class Accumulator {
/** Total number of complete commit graphs read into memory. */
long readCommitGraph;
/** Total number of object size indexes added into memory. */
long readObjectSizeIndex;
/** Total number of bytes read from pack indexes. */
long readIdxBytes;
/** Total number of bytes read from commit graphs. */
long readCommitGraphBytes;
/** Total numer of bytes read from object size index */
long readObjectSizeIndexBytes;
/** Total microseconds spent reading pack indexes. */
long readIdxMicros;
@ -58,6 +70,9 @@ public static class Accumulator {
/** Total microseconds spent creating commit graphs. */
long readCommitGraphMicros;
/** Total microseconds spent creating object size indexes */
long readObjectSizeIndexMicros;
/** Total number of bytes read from bitmap indexes. */
long readBitmapIdxBytes;
@ -88,6 +103,15 @@ public static class Accumulator {
/** Total microseconds spent inflating compressed bytes. */
long inflationMicros;
/** Count of queries for the size of an object via #isNotLargerThan */
long isNotLargerThanCallCount;
/** Object was below threshold in the object size index */
long objectSizeIndexMiss;
/** Object size found in the object size index */
long objectSizeIndexHit;
Accumulator() {
}
}
@ -143,6 +167,15 @@ public long getCommitGraphCacheHits() {
return stats.commitGraphCacheHit;
}
/**
* Get total number of object size index cache hits.
*
* @return total number of object size index cache hits.
*/
public long getObjectSizeIndexCacheHits() {
return stats.objectSizeIndexCacheHit;
}
/**
* Get total number of complete pack indexes read into memory.
*
@ -179,6 +212,15 @@ public long getReadBitmapIndexCount() {
return stats.readBitmap;
}
/**
* Get total number of complete object size indexes read into memory.
*
* @return total number of complete object size indexes read into memory.
*/
public long getReadObjectSizeIndexCount() {
return stats.readObjectSizeIndex;
}
/**
* Get total number of bytes read from pack indexes.
*
@ -297,4 +339,41 @@ public long getInflatedBytes() {
public long getInflationMicros() {
return stats.inflationMicros;
}
/**
* Get count of invocations to
* {@link DfsReader#isNotLargerThan(AnyObjectId, int, long)}
* <p>
* Each call could use the object-size index or not.
*
* @return how many times the size of an object was checked with
* {@link DfsReader#isNotLargerThan(AnyObjectId, int, long)}
*/
public long getIsNotLargerThanCallCount() {
return stats.isNotLargerThanCallCount;
}
/**
* Get number of times the size of a blob was found in the object size
* index.
* <p>
* This counts only queries for blobs on packs with object size index.
*
* @return count of object size index hits
*/
public long getObjectSizeIndexHits() {
return stats.objectSizeIndexHit;
}
/**
* Get number of times the size of an object was not found in the object
* size index. This usually means it was below the threshold.
* <p>
* This counts only queries for blobs on packs with object size index.
*
* @return count of object size index misses.
*/
public long getObjectSizeIndexMisses() {
return stats.objectSizeIndexMiss;
}
}