DfsPackFile/DfsGC: Write commit graphs and expose in pack

JGit knows how to read/write commit graphs but the DFS stack is not
using it yet.

The DFS garbage collector generates a commit-graph with commits
reachable from any ref. The pack is stored as extra stream in the GC
pack. DfsPackFile mimicks how other indices are loaded storing the
reference in DFS cache.

Signed-off-by: Xing Huang <xingkhuang@google.com>
Change-Id: I3f94997377986d21a56b300d8358dd27be37f5de
This commit is contained in:
Xing Huang 2023-02-06 14:18:59 -06:00 committed by Ivan Frade
parent eccae7cf0b
commit df5b7959be
7 changed files with 282 additions and 1 deletions

View File

@ -284,12 +284,14 @@ public void noConcurrencySerializedReads_oneRepo() throws Exception {
asyncRun(() -> pack.getBitmapIndex(reader));
asyncRun(() -> pack.getPackIndex(reader));
asyncRun(() -> pack.getBitmapIndex(reader));
asyncRun(() -> pack.getCommitGraph(reader));
}
waitForExecutorPoolTermination();
assertEquals(1, cache.getMissCount()[PackExt.BITMAP_INDEX.ordinal()]);
assertEquals(1, cache.getMissCount()[PackExt.INDEX.ordinal()]);
assertEquals(1, cache.getMissCount()[PackExt.REVERSE_INDEX.ordinal()]);
assertEquals(1, cache.getMissCount()[PackExt.COMMIT_GRAPH.ordinal()]);
}
@SuppressWarnings("resource")
@ -313,12 +315,15 @@ public void noConcurrencySerializedReads_twoRepos() throws Exception {
}
asyncRun(() -> pack1.getBitmapIndex(reader));
asyncRun(() -> pack2.getBitmapIndex(reader));
asyncRun(() -> pack1.getCommitGraph(reader));
asyncRun(() -> pack2.getCommitGraph(reader));
}
waitForExecutorPoolTermination();
assertEquals(2, cache.getMissCount()[PackExt.BITMAP_INDEX.ordinal()]);
assertEquals(2, cache.getMissCount()[PackExt.INDEX.ordinal()]);
assertEquals(2, cache.getMissCount()[PackExt.REVERSE_INDEX.ordinal()]);
assertEquals(2, cache.getMissCount()[PackExt.COMMIT_GRAPH.ordinal()]);
}
@SuppressWarnings("resource")
@ -342,12 +347,15 @@ public void lowConcurrencyParallelReads_twoRepos() throws Exception {
}
asyncRun(() -> pack1.getBitmapIndex(reader));
asyncRun(() -> pack2.getBitmapIndex(reader));
asyncRun(() -> pack1.getCommitGraph(reader));
asyncRun(() -> pack2.getCommitGraph(reader));
}
waitForExecutorPoolTermination();
assertEquals(2, cache.getMissCount()[PackExt.BITMAP_INDEX.ordinal()]);
assertEquals(2, cache.getMissCount()[PackExt.INDEX.ordinal()]);
assertEquals(2, cache.getMissCount()[PackExt.REVERSE_INDEX.ordinal()]);
assertEquals(2, cache.getMissCount()[PackExt.COMMIT_GRAPH.ordinal()]);
}
@SuppressWarnings("resource")
@ -372,7 +380,9 @@ public void lowConcurrencyParallelReads_twoReposAndIndex()
}
asyncRun(() -> pack1.getBitmapIndex(reader));
asyncRun(() -> pack1.getPackIndex(reader));
asyncRun(() -> pack1.getCommitGraph(reader));
asyncRun(() -> pack2.getBitmapIndex(reader));
asyncRun(() -> pack2.getCommitGraph(reader));
}
waitForExecutorPoolTermination();
@ -380,6 +390,7 @@ public void lowConcurrencyParallelReads_twoReposAndIndex()
// Index is loaded once for each repo.
assertEquals(2, cache.getMissCount()[PackExt.INDEX.ordinal()]);
assertEquals(2, cache.getMissCount()[PackExt.REVERSE_INDEX.ordinal()]);
assertEquals(2, cache.getMissCount()[PackExt.COMMIT_GRAPH.ordinal()]);
}
@Test
@ -396,12 +407,14 @@ public void highConcurrencyParallelReads_oneRepo() throws Exception {
asyncRun(() -> pack.getBitmapIndex(reader));
asyncRun(() -> pack.getPackIndex(reader));
asyncRun(() -> pack.getBitmapIndex(reader));
asyncRun(() -> pack.getCommitGraph(reader));
}
waitForExecutorPoolTermination();
assertEquals(1, cache.getMissCount()[PackExt.BITMAP_INDEX.ordinal()]);
assertEquals(1, cache.getMissCount()[PackExt.INDEX.ordinal()]);
assertEquals(1, cache.getMissCount()[PackExt.REVERSE_INDEX.ordinal()]);
assertEquals(1, cache.getMissCount()[PackExt.COMMIT_GRAPH.ordinal()]);
}
@Test
@ -420,12 +433,14 @@ public void highConcurrencyParallelReads_oneRepoParallelReverseIndex()
asyncRun(() -> pack.getBitmapIndex(reader));
asyncRun(() -> pack.getPackIndex(reader));
asyncRun(() -> pack.getBitmapIndex(reader));
asyncRun(() -> pack.getCommitGraph(reader));
}
waitForExecutorPoolTermination();
assertEquals(1, cache.getMissCount()[PackExt.BITMAP_INDEX.ordinal()]);
assertEquals(1, cache.getMissCount()[PackExt.INDEX.ordinal()]);
assertEquals(1, cache.getMissCount()[PackExt.REVERSE_INDEX.ordinal()]);
assertEquals(1, cache.getMissCount()[PackExt.COMMIT_GRAPH.ordinal()]);
}
private void resetCache() {
@ -450,7 +465,7 @@ private InMemoryRepository createRepoWithBitmap(String repoName)
repository.branch("/refs/ref2" + repoName).commit()
.add("blob2", "blob2" + repoName).parent(commit).create();
}
new DfsGarbageCollector(repo).pack(null);
new DfsGarbageCollector(repo).setWriteCommitGraph(true).pack(null);
return repo;
}

View File

@ -18,6 +18,7 @@
import java.util.Collections;
import java.util.concurrent.TimeUnit;
import org.eclipse.jgit.internal.storage.commitgraph.CommitGraph;
import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource;
import org.eclipse.jgit.internal.storage.reftable.RefCursor;
import org.eclipse.jgit.internal.storage.reftable.ReftableConfig;
@ -976,10 +977,139 @@ public void reftableWithTombstoneNotResurrected() throws Exception {
assertNull(refdb.exactRef(NEXT));
}
@Test
public void produceCommitGraphAllRefsIncludedFromDisk() throws Exception {
String tag = "refs/tags/tag1";
String head = "refs/heads/head1";
String nonHead = "refs/something/nonHead";
RevCommit rootCommitTagged = git.branch(tag).commit().message("0")
.noParents().create();
RevCommit headTip = git.branch(head).commit().message("1")
.parent(rootCommitTagged).create();
RevCommit nonHeadTip = git.branch(nonHead).commit().message("2")
.parent(rootCommitTagged).create();
gcWithCommitGraph();
assertEquals(2, odb.getPacks().length);
DfsPackFile gcPack = odb.getPacks()[0];
assertEquals(GC, gcPack.getPackDescription().getPackSource());
DfsReader reader = odb.newReader();
CommitGraph cg = gcPack.getCommitGraph(reader);
assertNotNull(cg);
assertTrue("all commits in commit graph", cg.getCommitCnt() == 3);
// GC packed
assertTrue("tag referenced commit is in graph",
cg.findGraphPosition(rootCommitTagged) != -1);
assertTrue("head referenced commit is in graph",
cg.findGraphPosition(headTip) != -1);
// GC_REST packed
assertTrue("nonHead referenced commit is in graph",
cg.findGraphPosition(nonHeadTip) != -1);
}
@Test
public void produceCommitGraphAllRefsIncludedFromCache() throws Exception {
String tag = "refs/tags/tag1";
String head = "refs/heads/head1";
String nonHead = "refs/something/nonHead";
RevCommit rootCommitTagged = git.branch(tag).commit().message("0")
.noParents().create();
RevCommit headTip = git.branch(head).commit().message("1")
.parent(rootCommitTagged).create();
RevCommit nonHeadTip = git.branch(nonHead).commit().message("2")
.parent(rootCommitTagged).create();
gcWithCommitGraph();
assertEquals(2, odb.getPacks().length);
DfsPackFile gcPack = odb.getPacks()[0];
assertEquals(GC, gcPack.getPackDescription().getPackSource());
DfsReader reader = odb.newReader();
gcPack.getCommitGraph(reader);
// Invoke cache hit
CommitGraph cachedCG = gcPack.getCommitGraph(reader);
assertNotNull(cachedCG);
assertTrue("commit graph have been read from disk once",
reader.stats.readCommitGraph == 1);
assertTrue("commit graph read contains content",
reader.stats.readCommitGraphBytes > 0);
assertTrue("commit graph read time is recorded",
reader.stats.readCommitGraphMicros > 0);
assertTrue("all commits in commit graph", cachedCG.getCommitCnt() == 3);
// GC packed
assertTrue("tag referenced commit is in graph",
cachedCG.findGraphPosition(rootCommitTagged) != -1);
assertTrue("head referenced commit is in graph",
cachedCG.findGraphPosition(headTip) != -1);
// GC_REST packed
assertTrue("nonHead referenced commit is in graph",
cachedCG.findGraphPosition(nonHeadTip) != -1);
}
@Test
public void noCommitGraphWithoutGcPack() throws Exception {
String nonHead = "refs/something/nonHead";
RevCommit nonHeadCommit = git.branch(nonHead).commit()
.message("nonhead").noParents().create();
commit().message("unreachable").parent(nonHeadCommit).create();
gcWithCommitGraph();
assertEquals(2, odb.getPacks().length);
for (DfsPackFile pack : odb.getPacks()) {
assertNull(pack.getCommitGraph(odb.newReader()));
}
}
@Test
public void commitGraphWithoutGCrestPack() throws Exception {
String head = "refs/heads/head1";
RevCommit headCommit = git.branch(head).commit().message("head")
.noParents().create();
RevCommit unreachableCommit = commit().message("unreachable")
.parent(headCommit).create();
gcWithCommitGraph();
assertEquals(2, odb.getPacks().length);
for (DfsPackFile pack : odb.getPacks()) {
DfsPackDescription d = pack.getPackDescription();
if (d.getPackSource() == GC) {
CommitGraph cg = pack.getCommitGraph(odb.newReader());
assertNotNull(cg);
assertTrue("commit graph only contains 1 commit",
cg.getCommitCnt() == 1);
assertTrue("head exists in commit graph",
cg.findGraphPosition(headCommit) != -1);
assertTrue("unreachable commit does not exist in commit graph",
cg.findGraphPosition(unreachableCommit) == -1);
} else if (d.getPackSource() == UNREACHABLE_GARBAGE) {
CommitGraph cg = pack.getCommitGraph(odb.newReader());
assertNull(cg);
} else {
fail("unexpected " + d.getPackSource());
break;
}
}
}
private TestRepository<InMemoryRepository>.CommitBuilder commit() {
return git.commit();
}
private void gcWithCommitGraph() throws IOException {
DfsGarbageCollector gc = new DfsGarbageCollector(repo);
gc.setWriteCommitGraph(true);
run(gc);
}
private void gcNoTtl() throws IOException {
DfsGarbageCollector gc = new DfsGarbageCollector(repo);
gc.setGarbageTtl(0, TimeUnit.MILLISECONDS); // disable TTL

View File

@ -1,4 +1,5 @@
cannotReadIndex=Cannot read index {0}
cannotReadCommitGraph=Cannot read commit graph {0}
shortReadOfBlock=Short read of block at {0} in pack {1}; expected {2} bytes, received only {3}
shortReadOfIndex=Short read of index {0}
willNotStoreEmptyPack=Cannot store empty pack

View File

@ -18,6 +18,7 @@
import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE;
import static org.eclipse.jgit.internal.storage.dfs.DfsPackCompactor.configureReftable;
import static org.eclipse.jgit.internal.storage.pack.PackExt.BITMAP_INDEX;
import static org.eclipse.jgit.internal.storage.pack.PackExt.COMMIT_GRAPH;
import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX;
import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
import static org.eclipse.jgit.internal.storage.pack.PackExt.REFTABLE;
@ -34,8 +35,11 @@
import java.util.List;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.internal.storage.commitgraph.CommitGraphWriter;
import org.eclipse.jgit.internal.storage.commitgraph.GraphCommits;
import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource;
import org.eclipse.jgit.internal.storage.file.PackIndex;
import org.eclipse.jgit.internal.storage.file.PackReverseIndex;
@ -75,6 +79,7 @@ public class DfsGarbageCollector {
private PackConfig packConfig;
private ReftableConfig reftableConfig;
private boolean convertToReftable = true;
private boolean writeCommitGraph;
private boolean includeDeletes;
private long reftableInitialMinUpdateIndex = 1;
private long reftableInitialMaxUpdateIndex = 1;
@ -278,6 +283,20 @@ public DfsGarbageCollector setGarbageTtl(long ttl, TimeUnit unit) {
return this;
}
/**
* Toggle commit graph generation.
* <p>
* False by default.
*
* @param enable
* Allow/Disallow commit graph generation.
* @return {@code this}
*/
public DfsGarbageCollector setWriteCommitGraph(boolean enable) {
writeCommitGraph = enable;
return this;
}
/**
* Create a single new pack file containing all of the live objects.
* <p>
@ -642,6 +661,10 @@ private DfsPackDescription writePack(PackSource source, PackWriter pw,
writeReftable(pack);
}
if (source == GC) {
writeCommitGraph(pack, pm);
}
try (DfsOutputStream out = objdb.writeFile(pack, PACK)) {
pw.writePack(pm, pm, out);
pack.addFileExt(PACK);
@ -724,4 +747,25 @@ private void writeReftable(DfsPackDescription pack, Collection<Ref> refs)
pack.setReftableStats(writer.getStats());
}
}
private void writeCommitGraph(DfsPackDescription pack, ProgressMonitor pm)
throws IOException {
if (!writeCommitGraph || !objdb.getShallowCommits().isEmpty()) {
return;
}
Set<ObjectId> allTips = refsBefore.stream().map(Ref::getObjectId)
.collect(Collectors.toUnmodifiableSet());
try (DfsOutputStream out = objdb.writeFile(pack, COMMIT_GRAPH);
RevWalk pool = new RevWalk(ctx)) {
GraphCommits gcs = GraphCommits.fromWalk(pm, allTips, pool);
CountingOutputStream cnt = new CountingOutputStream(out);
CommitGraphWriter writer = new CommitGraphWriter(gcs);
writer.write(pm, cnt);
pack.addFileExt(COMMIT_GRAPH);
pack.setFileSize(COMMIT_GRAPH, cnt.getCount());
pack.setBlockSize(COMMIT_GRAPH, out.blockSize());
}
}
}

View File

@ -14,6 +14,7 @@
import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE;
import static org.eclipse.jgit.internal.storage.pack.PackExt.BITMAP_INDEX;
import static org.eclipse.jgit.internal.storage.pack.PackExt.COMMIT_GRAPH;
import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX;
import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
import static org.eclipse.jgit.internal.storage.pack.PackExt.REVERSE_INDEX;
@ -37,6 +38,8 @@
import org.eclipse.jgit.errors.PackInvalidException;
import org.eclipse.jgit.errors.StoredObjectRepresentationNotAvailableException;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.internal.storage.commitgraph.CommitGraph;
import org.eclipse.jgit.internal.storage.commitgraph.CommitGraphLoader;
import org.eclipse.jgit.internal.storage.file.PackBitmapIndex;
import org.eclipse.jgit.internal.storage.file.PackIndex;
import org.eclipse.jgit.internal.storage.file.PackReverseIndex;
@ -69,6 +72,9 @@ public final class DfsPackFile extends BlockBasedFile {
/** Index of compressed bitmap mapping entire object graph. */
private volatile PackBitmapIndex bitmapIndex;
/** Index of compressed commit graph mapping entire object graph. */
private volatile CommitGraph commitGraph;
/**
* Objects we have tried to read, and discovered to be corrupt.
* <p>
@ -215,6 +221,43 @@ public PackBitmapIndex getBitmapIndex(DfsReader ctx) throws IOException {
return bitmapIndex;
}
/**
* Get the Commit Graph for this PackFile.
*
* @param ctx
* reader context to support reading from the backing store if
* the index is not already loaded in memory.
* @return {@link org.eclipse.jgit.internal.storage.commitgraph.CommitGraph},
* null if pack doesn't have it.
* @throws java.io.IOException
* the Commit Graph is not available, or is corrupt.
*/
public CommitGraph getCommitGraph(DfsReader ctx) throws IOException {
if (invalid || isGarbage() || !desc.hasFileExt(COMMIT_GRAPH)) {
return null;
}
if (commitGraph != null) {
return commitGraph;
}
DfsStreamKey commitGraphKey = desc.getStreamKey(COMMIT_GRAPH);
AtomicBoolean cacheHit = new AtomicBoolean(true);
DfsBlockCache.Ref<CommitGraph> cgref = cache
.getOrLoadRef(commitGraphKey, REF_POSITION, () -> {
cacheHit.set(false);
return loadCommitGraph(ctx, commitGraphKey);
});
if (cacheHit.get()) {
ctx.stats.commitGraphCacheHit++;
}
CommitGraph cg = cgref.get();
if (commitGraph == null && cg != null) {
commitGraph = cg;
}
return commitGraph;
}
PackReverseIndex getReverseIdx(DfsReader ctx) throws IOException {
if (reverseIndex != null) {
return reverseIndex;
@ -1081,4 +1124,37 @@ private DfsBlockCache.Ref<PackBitmapIndex> loadBitmapIndex(DfsReader ctx,
desc.getFileName(BITMAP_INDEX)), e);
}
}
private DfsBlockCache.Ref<CommitGraph> loadCommitGraph(DfsReader ctx,
DfsStreamKey cgkey) throws IOException {
ctx.stats.readCommitGraph++;
long start = System.nanoTime();
try (ReadableChannel rc = ctx.db.openFile(desc, COMMIT_GRAPH)) {
long size;
CommitGraph cg;
try {
InputStream in = Channels.newInputStream(rc);
int wantSize = 8192;
int bs = rc.blockSize();
if (0 < bs && bs < wantSize) {
bs = (wantSize / bs) * bs;
} else if (bs <= 0) {
bs = wantSize;
}
in = new BufferedInputStream(in, bs);
cg = CommitGraphLoader.read(in);
} finally {
size = rc.position();
ctx.stats.readCommitGraphBytes += size;
ctx.stats.readCommitGraphMicros += elapsedMicros(start);
}
commitGraph = cg;
return new DfsBlockCache.Ref<>(cgkey, REF_POSITION, size, cg);
} catch (IOException e) {
throw new IOException(
MessageFormat.format(DfsText.get().cannotReadCommitGraph,
desc.getFileName(COMMIT_GRAPH)),
e);
}
}
}

View File

@ -23,6 +23,7 @@
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.zip.DataFormatException;
import java.util.zip.Inflater;
@ -31,6 +32,7 @@
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.errors.StoredObjectRepresentationNotAvailableException;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.internal.storage.commitgraph.CommitGraph;
import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackList;
import org.eclipse.jgit.internal.storage.file.BitmapIndexImpl;
import org.eclipse.jgit.internal.storage.file.PackBitmapIndex;
@ -121,6 +123,18 @@ public BitmapIndex getBitmapIndex() throws IOException {
return null;
}
/** {@inheritDoc} */
@Override
public Optional<CommitGraph> getCommitGraph() throws IOException {
for (DfsPackFile pack : db.getPacks()) {
CommitGraph cg = pack.getCommitGraph(this);
if (cg != null) {
return Optional.of(cg);
}
}
return Optional.empty();
}
/** {@inheritDoc} */
@Override
public Collection<CachedPack> getCachedPacksAndUpdate(

View File

@ -28,6 +28,7 @@ public static DfsText get() {
// @formatter:off
/***/ public String cannotReadIndex;
/***/ public String cannotReadCommitGraph;
/***/ public String shortReadOfBlock;
/***/ public String shortReadOfIndex;
/***/ public String willNotStoreEmptyPack;