From de7d06775c5cf071fb5a14b19d230f5e0845b9ef Mon Sep 17 00:00:00 2001 From: kylezhao Date: Wed, 14 Jul 2021 19:02:12 +0800 Subject: [PATCH] RevWalk: integrate commit-graph with commit parsing RevWalk#createCommit() will inspect the commit-graph file to find the specified object's graph position and then return a new RevCommitCG instance. RevCommitGC is a RevCommit with an additional "pointer" (the position) to the commit-graph, so it can load the headers and metadata from there instead of the pack. This saves IO access in walks where the body is not needed (i.e. #isRetainBody is false and #parseBody is not invoked). RevWalk uses automatically the commit-graph if available, no action needed from callers. The commit-graph is fetched on first access from the reader (that internally can keep it loaded and reuse it between walks). The startup cost of reading the entire commit graph is small. After testing, reading a commit-graph with 1 million commits takes less than 50ms. If we use RepositoryCache, it will not be initialized util the commit-graph is rewritten. Bug: 574368 Change-Id: I90d0f64af24f3acc3eae6da984eae302d338f5ee Signed-off-by: kylezhao --- .../jgit/revwalk/RevWalkCommitGraphTest.java | 319 ++++++++++++++++++ .../storage/commitgraph/CommitGraph.java | 27 ++ .../org/eclipse/jgit/revwalk/RevCommit.java | 25 +- .../org/eclipse/jgit/revwalk/RevCommitCG.java | 93 +++++ .../src/org/eclipse/jgit/revwalk/RevWalk.java | 51 +++ 5 files changed, 514 insertions(+), 1 deletion(-) create mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/RevWalkCommitGraphTest.java create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommitCG.java diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/RevWalkCommitGraphTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/RevWalkCommitGraphTest.java new file mode 100644 index 000000000..05e023bb7 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/RevWalkCommitGraphTest.java @@ -0,0 +1,319 @@ +/* + * Copyright (C) 2023, Tencent. + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +package org.eclipse.jgit.revwalk; + +import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraph.EMPTY; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.eclipse.jgit.api.Git; +import org.eclipse.jgit.internal.storage.file.GC; +import org.eclipse.jgit.lib.ConfigConstants; +import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.lib.Ref; +import org.eclipse.jgit.revwalk.filter.MessageRevFilter; +import org.eclipse.jgit.revwalk.filter.RevFilter; +import org.eclipse.jgit.treewalk.filter.AndTreeFilter; +import org.eclipse.jgit.treewalk.filter.PathFilter; +import org.eclipse.jgit.treewalk.filter.TreeFilter; +import org.junit.Test; + +public class RevWalkCommitGraphTest extends RevWalkTestCase { + + private RevWalk rw; + + @Override + public void setUp() throws Exception { + super.setUp(); + rw = new RevWalk(db); + } + + @Test + public void testParseHeaders() throws Exception { + RevCommit c1 = commitFile("file1", "1", "master"); + + RevCommit notParseInGraph = rw.lookupCommit(c1); + rw.parseHeaders(notParseInGraph); + assertFalse(notParseInGraph instanceof RevCommitCG); + assertNotNull(notParseInGraph.getRawBuffer()); + assertEquals(Constants.COMMIT_GENERATION_UNKNOWN, + notParseInGraph.getGeneration()); + + enableAndWriteCommitGraph(); + + reinitializeRevWalk(); + RevCommit parseInGraph = rw.lookupCommit(c1); + parseInGraph.parseHeaders(rw); + + assertTrue(parseInGraph instanceof RevCommitCG); + assertNotNull(parseInGraph.getRawBuffer()); + assertEquals(1, parseInGraph.getGeneration()); + assertEquals(notParseInGraph.getId(), parseInGraph.getId()); + assertEquals(notParseInGraph.getTree(), parseInGraph.getTree()); + assertEquals(notParseInGraph.getCommitTime(), parseInGraph.getCommitTime()); + assertArrayEquals(notParseInGraph.getParents(), parseInGraph.getParents()); + + reinitializeRevWalk(); + rw.setRetainBody(false); + RevCommit noBody = rw.lookupCommit(c1); + noBody.parseHeaders(rw); + + assertTrue(noBody instanceof RevCommitCG); + assertNull(noBody.getRawBuffer()); + assertEquals(1, noBody.getGeneration()); + assertEquals(notParseInGraph.getId(), noBody.getId()); + assertEquals(notParseInGraph.getTree(), noBody.getTree()); + assertEquals(notParseInGraph.getCommitTime(), noBody.getCommitTime()); + assertArrayEquals(notParseInGraph.getParents(), noBody.getParents()); + } + + @Test + public void testInitializeShallowCommits() throws Exception { + RevCommit c1 = commit(commit()); + branch(c1, "master"); + enableAndWriteCommitGraph(); + assertCommitCntInGraph(2); + + db.getObjectDatabase().setShallowCommits(Collections.singleton(c1)); + RevCommit parseInGraph = rw.lookupCommit(c1); + parseInGraph.parseHeaders(rw); + + assertTrue(parseInGraph instanceof RevCommitCG); + assertNotNull(parseInGraph.getRawBuffer()); + assertEquals(2, parseInGraph.getGeneration()); + assertEquals(0, parseInGraph.getParentCount()); + } + + @Test + public void testTreeFilter() throws Exception { + RevCommit c1 = commitFile("file1", "1", "master"); + RevCommit c2 = commitFile("file2", "2", "master"); + RevCommit c3 = commitFile("file1", "3", "master"); + RevCommit c4 = commitFile("file2", "4", "master"); + + enableAndWriteCommitGraph(); + assertCommitCntInGraph(4); + + rw.markStart(rw.lookupCommit(c4)); + rw.setTreeFilter(AndTreeFilter.create(PathFilter.create("file1"), + TreeFilter.ANY_DIFF)); + assertEquals(c3, rw.next()); + assertEquals(c1, rw.next()); + assertNull(rw.next()); + + reinitializeRevWalk(); + rw.markStart(rw.lookupCommit(c4)); + rw.setTreeFilter(AndTreeFilter.create(PathFilter.create("file2"), + TreeFilter.ANY_DIFF)); + assertEquals(c4, rw.next()); + assertEquals(c2, rw.next()); + assertNull(rw.next()); + } + + @Test + public void testWalkWithCommitMessageFilter() throws Exception { + RevCommit a = commit(); + RevCommit b = commitBuilder().parent(a) + .message("The quick brown fox jumps over the lazy dog!") + .create(); + RevCommit c = commitBuilder().parent(b).message("commit-c").create(); + branch(c, "master"); + + enableAndWriteCommitGraph(); + assertCommitCntInGraph(3); + + rw.setRevFilter(MessageRevFilter.create("quick brown fox jumps")); + rw.markStart(rw.lookupCommit(c)); + assertEquals(b, rw.next()); + assertNull(rw.next()); + } + + @Test + public void testCommitsWalk() throws Exception { + RevCommit c1 = commit(); + branch(c1, "commits/1"); + RevCommit c2 = commit(c1); + branch(c2, "commits/2"); + RevCommit c3 = commit(c2); + branch(c3, "commits/3"); + + enableAndWriteCommitGraph(); + assertCommitCntInGraph(3); + testRevWalkBehavior("commits/1", "commits/3"); + + // add more commits + RevCommit c4 = commit(c1); + RevCommit c5 = commit(c4); + RevCommit c6 = commit(c1); + RevCommit c7 = commit(c6); + + RevCommit m1 = commit(c2, c4); + branch(m1, "merge/1"); + RevCommit m2 = commit(c4, c6); + branch(m2, "merge/2"); + RevCommit m3 = commit(c3, c5, c7); + branch(m3, "merge/3"); + + /* + *
+		 * current graph structure:
+		 *
+		 *    __M3___
+		 *   /   |   \
+		 *  3 M1 5 M2 7
+		 *  |/  \|/  \|
+		 *  2    4    6
+		 *  |___/____/
+		 *  1
+		 * 
+ */ + enableAndWriteCommitGraph(); + reinitializeRevWalk(); + assertCommitCntInGraph(10); + testRevWalkBehavior("merge/1", "merge/2"); + testRevWalkBehavior("merge/1", "merge/3"); + testRevWalkBehavior("merge/2", "merge/3"); + + // add one more commit + RevCommit c8 = commit(m3); + branch(c8, "commits/8"); + + /* + *
+		 * current graph structure:
+		 *       8
+		 *       |
+		 *    __M3___
+		 *   /   |   \
+		 *  3 M1 5 M2 7
+		 *  |/  \|/  \|
+		 *  2    4    6
+		 *  |___/____/
+		 *  1
+		 * 
+ */ + testRevWalkBehavior("commits/8", "merge/1"); + testRevWalkBehavior("commits/8", "merge/2"); + + enableAndWriteCommitGraph(); + reinitializeRevWalk(); + assertCommitCntInGraph(11); + testRevWalkBehavior("commits/8", "merge/1"); + testRevWalkBehavior("commits/8", "merge/2"); + } + + void testRevWalkBehavior(String branch, String compare) throws Exception { + assertCommits( + travel(TreeFilter.ALL, RevFilter.MERGE_BASE, RevSort.NONE, true, + branch, compare), + travel(TreeFilter.ALL, RevFilter.MERGE_BASE, RevSort.NONE, + false, branch, compare)); + + assertCommits( + travel(TreeFilter.ALL, RevFilter.ALL, RevSort.TOPO, true, + branch), + travel(TreeFilter.ALL, RevFilter.ALL, RevSort.TOPO, false, + branch)); + + assertCommits( + travel(TreeFilter.ALL, RevFilter.ALL, RevSort.TOPO, true, + compare), + travel(TreeFilter.ALL, RevFilter.ALL, RevSort.TOPO, false, + compare)); + + assertCommits( + travel(TreeFilter.ALL, RevFilter.ALL, RevSort.COMMIT_TIME_DESC, + true, branch), + travel(TreeFilter.ALL, RevFilter.ALL, RevSort.COMMIT_TIME_DESC, + false, branch)); + + assertCommits( + travel(TreeFilter.ALL, RevFilter.ALL, RevSort.COMMIT_TIME_DESC, + true, compare), + travel(TreeFilter.ALL, RevFilter.ALL, RevSort.COMMIT_TIME_DESC, + false, compare)); + } + + void assertCommitCntInGraph(int expect) { + assertEquals(expect, rw.commitGraph().getCommitCnt()); + } + + void assertCommits(List expect, List actual) { + assertEquals(expect.size(), actual.size()); + + for (int i = 0; i < expect.size(); i++) { + RevCommit c1 = expect.get(i); + RevCommit c2 = actual.get(i); + + assertEquals(c1.getId(), c2.getId()); + assertEquals(c1.getTree(), c2.getTree()); + assertEquals(c1.getCommitTime(), c2.getCommitTime()); + assertArrayEquals(c1.getParents(), c2.getParents()); + assertArrayEquals(c1.getRawBuffer(), c2.getRawBuffer()); + } + } + + Ref branch(RevCommit commit, String name) throws Exception { + return Git.wrap(db).branchCreate().setName(name) + .setStartPoint(commit.name()).call(); + } + + List travel(TreeFilter treeFilter, RevFilter revFilter, + RevSort revSort, boolean enableCommitGraph, String... starts) + throws Exception { + db.getConfig().setBoolean(ConfigConstants.CONFIG_CORE_SECTION, null, + ConfigConstants.CONFIG_COMMIT_GRAPH, enableCommitGraph); + + try (RevWalk walk = new RevWalk(db)) { + walk.setTreeFilter(treeFilter); + walk.setRevFilter(revFilter); + walk.sort(revSort); + walk.setRetainBody(false); + for (String start : starts) { + walk.markStart(walk.lookupCommit(db.resolve(start))); + } + List commits = new ArrayList<>(); + + if (enableCommitGraph) { + assertTrue(walk.commitGraph().getCommitCnt() > 0); + } else { + assertEquals(EMPTY, walk.commitGraph()); + } + + for (RevCommit commit : walk) { + commits.add(commit); + } + return commits; + } + } + + void enableAndWriteCommitGraph() throws Exception { + db.getConfig().setBoolean(ConfigConstants.CONFIG_CORE_SECTION, null, + ConfigConstants.CONFIG_COMMIT_GRAPH, true); + db.getConfig().setBoolean(ConfigConstants.CONFIG_GC_SECTION, null, + ConfigConstants.CONFIG_KEY_WRITE_COMMIT_GRAPH, true); + GC gc = new GC(db); + gc.gc(); + } + + private void reinitializeRevWalk() { + rw.close(); + rw = new RevWalk(db); + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraph.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraph.java index 162e0e2cb..0796293f5 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraph.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraph.java @@ -30,6 +30,33 @@ */ public interface CommitGraph { + /** Empty {@link CommitGraph} with no results. */ + CommitGraph EMPTY = new CommitGraph() { + /** {@inheritDoc} */ + @Override + public int findGraphPosition(AnyObjectId commit) { + return -1; + } + + /** {@inheritDoc} */ + @Override + public CommitData getCommitData(int graphPos) { + return null; + } + + /** {@inheritDoc} */ + @Override + public ObjectId getObjectId(int graphPos) { + return null; + } + + /** {@inheritDoc} */ + @Override + public long getCommitCnt() { + return 0; + } + }; + /** * Find the position in the commit-graph of the commit. *

diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommit.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommit.java index 6b644cef9..e155a2563 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommit.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommit.java @@ -105,7 +105,12 @@ public static RevCommit parse(RevWalk rw, byte[] raw) throws IOException { static final RevCommit[] NO_PARENTS = {}; - private RevTree tree; + /** + * Tree reference of the commit. + * + * @since 6.5 + */ + protected RevTree tree; /** * Avoid accessing this field directly. Use method @@ -656,6 +661,24 @@ public final List getFooterLines(FooterKey keyName) { return r; } + /** + * Get the distance of the commit from the root, as defined in + * {@link org.eclipse.jgit.internal.storage.commitgraph.CommitGraph} + *

+ * Generation number is + * {@link org.eclipse.jgit.lib.Constants#COMMIT_GENERATION_UNKNOWN} when the + * commit is not in the commit-graph. If a commit-graph file was written by + * a version of Git that did not compute generation numbers, then those + * commits in commit-graph will have generation number represented by + * {@link org.eclipse.jgit.lib.Constants#COMMIT_GENERATION_NOT_COMPUTED}. + * + * @return the generation number + * @since 6.5 + */ + int getGeneration() { + return Constants.COMMIT_GENERATION_UNKNOWN; + } + /** * Reset this commit to allow another RevWalk with the same instances. *

diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommitCG.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommitCG.java new file mode 100644 index 000000000..b68f65b68 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommitCG.java @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2023, Tencent. + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +package org.eclipse.jgit.revwalk; + +import java.io.IOException; + +import org.eclipse.jgit.errors.IncorrectObjectTypeException; +import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.internal.storage.commitgraph.CommitGraph; +import org.eclipse.jgit.lib.AnyObjectId; +import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.lib.ObjectId; + +/** + * RevCommit parsed from + * {@link org.eclipse.jgit.internal.storage.commitgraph.CommitGraph}. + * + * @since 6.5 + */ +class RevCommitCG extends RevCommit { + + private final int graphPosition; + + private int generation = Constants.COMMIT_GENERATION_UNKNOWN; + + /** + * Create a new commit reference. + * + * @param id + * object name for the commit. + * @param graphPosition + * the position in the commit-graph of the object. + */ + protected RevCommitCG(AnyObjectId id, int graphPosition) { + super(id); + this.graphPosition = graphPosition; + } + + /** {@inheritDoc} */ + @Override + void parseHeaders(RevWalk walk) throws MissingObjectException, + IncorrectObjectTypeException, IOException { + CommitGraph graph = walk.commitGraph(); + CommitGraph.CommitData data = graph.getCommitData(graphPosition); + if (data == null) { + // RevCommitCG was created because we got its graphPosition from + // commit-graph. If now the commit-graph doesn't know about it, + // something went wrong. + throw new IllegalStateException(); + } + if (!walk.shallowCommitsInitialized) { + walk.initializeShallowCommits(this); + } + + this.tree = walk.lookupTree(data.getTree()); + this.commitTime = (int) data.getCommitTime(); + this.generation = data.getGeneration(); + + if (getParents() == null) { + int[] pGraphList = data.getParents(); + if (pGraphList.length == 0) { + this.parents = RevCommit.NO_PARENTS; + } else { + RevCommit[] pList = new RevCommit[pGraphList.length]; + for (int i = 0; i < pList.length; i++) { + int graphPos = pGraphList[i]; + ObjectId objId = graph.getObjectId(graphPos); + pList[i] = walk.lookupCommit(objId, graphPos); + } + this.parents = pList; + } + } + + flags |= PARSED; + if (walk.isRetainBody()) { + super.parseBody(walk); + } + } + + /** {@inheritDoc} */ + @Override + int getGeneration() { + return generation; + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevWalk.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevWalk.java index 8da36c524..a66e7c86b 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevWalk.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevWalk.java @@ -12,6 +12,8 @@ package org.eclipse.jgit.revwalk; +import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraph.EMPTY; + import java.io.IOException; import java.text.MessageFormat; import java.util.ArrayList; @@ -30,6 +32,7 @@ import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.AsyncObjectLoaderQueue; +import org.eclipse.jgit.internal.storage.commitgraph.CommitGraph; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.MutableObjectId; import org.eclipse.jgit.lib.NullProgressMonitor; @@ -189,6 +192,8 @@ public class RevWalk implements Iterable, AutoCloseable { private TreeFilter treeFilter; + private CommitGraph commitGraph; + private boolean retainBody = true; private boolean rewriteParents = true; @@ -237,6 +242,7 @@ public RevWalk(ObjectReader or) { filter = RevFilter.ALL; treeFilter = TreeFilter.ALL; this.closeReader = closeReader; + commitGraph = null; } /** @@ -899,6 +905,29 @@ public RevCommit lookupCommit(AnyObjectId id) { return c; } + /** + * This method is intended to be invoked only by {@link RevCommitCG}, in + * order to give commit the correct graphPosition before accessing the + * commit-graph. In this way, the headers of the commit can be obtained in + * constant time. + * + * @param id + * name of the commit object. + * @param graphPos + * the position in the commit-graph of the object. + * @return reference to the commit object. Never null. + * @since 6.5 + */ + @NonNull + protected RevCommit lookupCommit(AnyObjectId id, int graphPos) { + RevCommit c = (RevCommit) objects.get(id); + if (c == null) { + c = createCommit(id, graphPos); + objects.add(c); + } + return c; + } + /** * Locate a reference to a tag without loading it. *

@@ -1135,6 +1164,21 @@ byte[] getCachedBytes(RevObject obj, ObjectLoader ldr) } } + /** + * Get the commit-graph. + * + * @return the commit-graph. Never null. + * @since 6.5 + */ + @NonNull + CommitGraph commitGraph() { + if (commitGraph == null) { + commitGraph = reader != null ? reader.getCommitGraph().orElse(EMPTY) + : EMPTY; + } + return commitGraph; + } + /** * Asynchronous object parsing. * @@ -1650,6 +1694,13 @@ public ObjectWalk toObjectWalkWithSameObjects() { * @return a new unparsed reference for the object. */ protected RevCommit createCommit(AnyObjectId id) { + return createCommit(id, commitGraph().findGraphPosition(id)); + } + + private RevCommit createCommit(AnyObjectId id, int graphPos) { + if (graphPos >= 0) { + return new RevCommitCG(id, graphPos); + } return new RevCommit(id); }