RevWalk: integrate commit-graph with commit parsing

RevWalk#createCommit() will inspect the commit-graph file to find the
specified object's graph position and then return a new RevCommitCG
instance.

RevCommitGC is a RevCommit with an additional "pointer" (the position)
to the commit-graph, so it can load the headers and metadata from there
instead of the pack. This saves IO access in walks where the body is not
needed (i.e. #isRetainBody is false and #parseBody is not invoked).

RevWalk uses automatically the commit-graph if available, no action
needed from callers. The commit-graph is fetched on first access from
the reader (that internally can keep it loaded and reuse it between
walks).

The startup cost of reading the entire commit graph is small. After
testing, reading a commit-graph with 1 million commits takes less than
50ms. If we use RepositoryCache, it will not be initialized util the
commit-graph is rewritten.

Bug: 574368
Change-Id: I90d0f64af24f3acc3eae6da984eae302d338f5ee
Signed-off-by: kylezhao <kylezhao@tencent.com>
This commit is contained in:
kylezhao 2021-07-14 19:02:12 +08:00
parent 801a56b48a
commit de7d06775c
5 changed files with 514 additions and 1 deletions

View File

@ -0,0 +1,319 @@
/*
* Copyright (C) 2023, Tencent.
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.revwalk;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraph.EMPTY;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.eclipse.jgit.api.Git;
import org.eclipse.jgit.internal.storage.file.GC;
import org.eclipse.jgit.lib.ConfigConstants;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.Ref;
import org.eclipse.jgit.revwalk.filter.MessageRevFilter;
import org.eclipse.jgit.revwalk.filter.RevFilter;
import org.eclipse.jgit.treewalk.filter.AndTreeFilter;
import org.eclipse.jgit.treewalk.filter.PathFilter;
import org.eclipse.jgit.treewalk.filter.TreeFilter;
import org.junit.Test;
public class RevWalkCommitGraphTest extends RevWalkTestCase {
private RevWalk rw;
@Override
public void setUp() throws Exception {
super.setUp();
rw = new RevWalk(db);
}
@Test
public void testParseHeaders() throws Exception {
RevCommit c1 = commitFile("file1", "1", "master");
RevCommit notParseInGraph = rw.lookupCommit(c1);
rw.parseHeaders(notParseInGraph);
assertFalse(notParseInGraph instanceof RevCommitCG);
assertNotNull(notParseInGraph.getRawBuffer());
assertEquals(Constants.COMMIT_GENERATION_UNKNOWN,
notParseInGraph.getGeneration());
enableAndWriteCommitGraph();
reinitializeRevWalk();
RevCommit parseInGraph = rw.lookupCommit(c1);
parseInGraph.parseHeaders(rw);
assertTrue(parseInGraph instanceof RevCommitCG);
assertNotNull(parseInGraph.getRawBuffer());
assertEquals(1, parseInGraph.getGeneration());
assertEquals(notParseInGraph.getId(), parseInGraph.getId());
assertEquals(notParseInGraph.getTree(), parseInGraph.getTree());
assertEquals(notParseInGraph.getCommitTime(), parseInGraph.getCommitTime());
assertArrayEquals(notParseInGraph.getParents(), parseInGraph.getParents());
reinitializeRevWalk();
rw.setRetainBody(false);
RevCommit noBody = rw.lookupCommit(c1);
noBody.parseHeaders(rw);
assertTrue(noBody instanceof RevCommitCG);
assertNull(noBody.getRawBuffer());
assertEquals(1, noBody.getGeneration());
assertEquals(notParseInGraph.getId(), noBody.getId());
assertEquals(notParseInGraph.getTree(), noBody.getTree());
assertEquals(notParseInGraph.getCommitTime(), noBody.getCommitTime());
assertArrayEquals(notParseInGraph.getParents(), noBody.getParents());
}
@Test
public void testInitializeShallowCommits() throws Exception {
RevCommit c1 = commit(commit());
branch(c1, "master");
enableAndWriteCommitGraph();
assertCommitCntInGraph(2);
db.getObjectDatabase().setShallowCommits(Collections.singleton(c1));
RevCommit parseInGraph = rw.lookupCommit(c1);
parseInGraph.parseHeaders(rw);
assertTrue(parseInGraph instanceof RevCommitCG);
assertNotNull(parseInGraph.getRawBuffer());
assertEquals(2, parseInGraph.getGeneration());
assertEquals(0, parseInGraph.getParentCount());
}
@Test
public void testTreeFilter() throws Exception {
RevCommit c1 = commitFile("file1", "1", "master");
RevCommit c2 = commitFile("file2", "2", "master");
RevCommit c3 = commitFile("file1", "3", "master");
RevCommit c4 = commitFile("file2", "4", "master");
enableAndWriteCommitGraph();
assertCommitCntInGraph(4);
rw.markStart(rw.lookupCommit(c4));
rw.setTreeFilter(AndTreeFilter.create(PathFilter.create("file1"),
TreeFilter.ANY_DIFF));
assertEquals(c3, rw.next());
assertEquals(c1, rw.next());
assertNull(rw.next());
reinitializeRevWalk();
rw.markStart(rw.lookupCommit(c4));
rw.setTreeFilter(AndTreeFilter.create(PathFilter.create("file2"),
TreeFilter.ANY_DIFF));
assertEquals(c4, rw.next());
assertEquals(c2, rw.next());
assertNull(rw.next());
}
@Test
public void testWalkWithCommitMessageFilter() throws Exception {
RevCommit a = commit();
RevCommit b = commitBuilder().parent(a)
.message("The quick brown fox jumps over the lazy dog!")
.create();
RevCommit c = commitBuilder().parent(b).message("commit-c").create();
branch(c, "master");
enableAndWriteCommitGraph();
assertCommitCntInGraph(3);
rw.setRevFilter(MessageRevFilter.create("quick brown fox jumps"));
rw.markStart(rw.lookupCommit(c));
assertEquals(b, rw.next());
assertNull(rw.next());
}
@Test
public void testCommitsWalk() throws Exception {
RevCommit c1 = commit();
branch(c1, "commits/1");
RevCommit c2 = commit(c1);
branch(c2, "commits/2");
RevCommit c3 = commit(c2);
branch(c3, "commits/3");
enableAndWriteCommitGraph();
assertCommitCntInGraph(3);
testRevWalkBehavior("commits/1", "commits/3");
// add more commits
RevCommit c4 = commit(c1);
RevCommit c5 = commit(c4);
RevCommit c6 = commit(c1);
RevCommit c7 = commit(c6);
RevCommit m1 = commit(c2, c4);
branch(m1, "merge/1");
RevCommit m2 = commit(c4, c6);
branch(m2, "merge/2");
RevCommit m3 = commit(c3, c5, c7);
branch(m3, "merge/3");
/*
* <pre>
* current graph structure:
*
* __M3___
* / | \
* 3 M1 5 M2 7
* |/ \|/ \|
* 2 4 6
* |___/____/
* 1
* </pre>
*/
enableAndWriteCommitGraph();
reinitializeRevWalk();
assertCommitCntInGraph(10);
testRevWalkBehavior("merge/1", "merge/2");
testRevWalkBehavior("merge/1", "merge/3");
testRevWalkBehavior("merge/2", "merge/3");
// add one more commit
RevCommit c8 = commit(m3);
branch(c8, "commits/8");
/*
* <pre>
* current graph structure:
* 8
* |
* __M3___
* / | \
* 3 M1 5 M2 7
* |/ \|/ \|
* 2 4 6
* |___/____/
* 1
* </pre>
*/
testRevWalkBehavior("commits/8", "merge/1");
testRevWalkBehavior("commits/8", "merge/2");
enableAndWriteCommitGraph();
reinitializeRevWalk();
assertCommitCntInGraph(11);
testRevWalkBehavior("commits/8", "merge/1");
testRevWalkBehavior("commits/8", "merge/2");
}
void testRevWalkBehavior(String branch, String compare) throws Exception {
assertCommits(
travel(TreeFilter.ALL, RevFilter.MERGE_BASE, RevSort.NONE, true,
branch, compare),
travel(TreeFilter.ALL, RevFilter.MERGE_BASE, RevSort.NONE,
false, branch, compare));
assertCommits(
travel(TreeFilter.ALL, RevFilter.ALL, RevSort.TOPO, true,
branch),
travel(TreeFilter.ALL, RevFilter.ALL, RevSort.TOPO, false,
branch));
assertCommits(
travel(TreeFilter.ALL, RevFilter.ALL, RevSort.TOPO, true,
compare),
travel(TreeFilter.ALL, RevFilter.ALL, RevSort.TOPO, false,
compare));
assertCommits(
travel(TreeFilter.ALL, RevFilter.ALL, RevSort.COMMIT_TIME_DESC,
true, branch),
travel(TreeFilter.ALL, RevFilter.ALL, RevSort.COMMIT_TIME_DESC,
false, branch));
assertCommits(
travel(TreeFilter.ALL, RevFilter.ALL, RevSort.COMMIT_TIME_DESC,
true, compare),
travel(TreeFilter.ALL, RevFilter.ALL, RevSort.COMMIT_TIME_DESC,
false, compare));
}
void assertCommitCntInGraph(int expect) {
assertEquals(expect, rw.commitGraph().getCommitCnt());
}
void assertCommits(List<RevCommit> expect, List<RevCommit> actual) {
assertEquals(expect.size(), actual.size());
for (int i = 0; i < expect.size(); i++) {
RevCommit c1 = expect.get(i);
RevCommit c2 = actual.get(i);
assertEquals(c1.getId(), c2.getId());
assertEquals(c1.getTree(), c2.getTree());
assertEquals(c1.getCommitTime(), c2.getCommitTime());
assertArrayEquals(c1.getParents(), c2.getParents());
assertArrayEquals(c1.getRawBuffer(), c2.getRawBuffer());
}
}
Ref branch(RevCommit commit, String name) throws Exception {
return Git.wrap(db).branchCreate().setName(name)
.setStartPoint(commit.name()).call();
}
List<RevCommit> travel(TreeFilter treeFilter, RevFilter revFilter,
RevSort revSort, boolean enableCommitGraph, String... starts)
throws Exception {
db.getConfig().setBoolean(ConfigConstants.CONFIG_CORE_SECTION, null,
ConfigConstants.CONFIG_COMMIT_GRAPH, enableCommitGraph);
try (RevWalk walk = new RevWalk(db)) {
walk.setTreeFilter(treeFilter);
walk.setRevFilter(revFilter);
walk.sort(revSort);
walk.setRetainBody(false);
for (String start : starts) {
walk.markStart(walk.lookupCommit(db.resolve(start)));
}
List<RevCommit> commits = new ArrayList<>();
if (enableCommitGraph) {
assertTrue(walk.commitGraph().getCommitCnt() > 0);
} else {
assertEquals(EMPTY, walk.commitGraph());
}
for (RevCommit commit : walk) {
commits.add(commit);
}
return commits;
}
}
void enableAndWriteCommitGraph() throws Exception {
db.getConfig().setBoolean(ConfigConstants.CONFIG_CORE_SECTION, null,
ConfigConstants.CONFIG_COMMIT_GRAPH, true);
db.getConfig().setBoolean(ConfigConstants.CONFIG_GC_SECTION, null,
ConfigConstants.CONFIG_KEY_WRITE_COMMIT_GRAPH, true);
GC gc = new GC(db);
gc.gc();
}
private void reinitializeRevWalk() {
rw.close();
rw = new RevWalk(db);
}
}

View File

@ -30,6 +30,33 @@
*/
public interface CommitGraph {
/** Empty {@link CommitGraph} with no results. */
CommitGraph EMPTY = new CommitGraph() {
/** {@inheritDoc} */
@Override
public int findGraphPosition(AnyObjectId commit) {
return -1;
}
/** {@inheritDoc} */
@Override
public CommitData getCommitData(int graphPos) {
return null;
}
/** {@inheritDoc} */
@Override
public ObjectId getObjectId(int graphPos) {
return null;
}
/** {@inheritDoc} */
@Override
public long getCommitCnt() {
return 0;
}
};
/**
* Find the position in the commit-graph of the commit.
* <p>

View File

@ -105,7 +105,12 @@ public static RevCommit parse(RevWalk rw, byte[] raw) throws IOException {
static final RevCommit[] NO_PARENTS = {};
private RevTree tree;
/**
* Tree reference of the commit.
*
* @since 6.5
*/
protected RevTree tree;
/**
* Avoid accessing this field directly. Use method
@ -656,6 +661,24 @@ public final List<String> getFooterLines(FooterKey keyName) {
return r;
}
/**
* Get the distance of the commit from the root, as defined in
* {@link org.eclipse.jgit.internal.storage.commitgraph.CommitGraph}
* <p>
* Generation number is
* {@link org.eclipse.jgit.lib.Constants#COMMIT_GENERATION_UNKNOWN} when the
* commit is not in the commit-graph. If a commit-graph file was written by
* a version of Git that did not compute generation numbers, then those
* commits in commit-graph will have generation number represented by
* {@link org.eclipse.jgit.lib.Constants#COMMIT_GENERATION_NOT_COMPUTED}.
*
* @return the generation number
* @since 6.5
*/
int getGeneration() {
return Constants.COMMIT_GENERATION_UNKNOWN;
}
/**
* Reset this commit to allow another RevWalk with the same instances.
* <p>

View File

@ -0,0 +1,93 @@
/*
* Copyright (C) 2023, Tencent.
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.revwalk;
import java.io.IOException;
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.internal.storage.commitgraph.CommitGraph;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.ObjectId;
/**
* RevCommit parsed from
* {@link org.eclipse.jgit.internal.storage.commitgraph.CommitGraph}.
*
* @since 6.5
*/
class RevCommitCG extends RevCommit {
private final int graphPosition;
private int generation = Constants.COMMIT_GENERATION_UNKNOWN;
/**
* Create a new commit reference.
*
* @param id
* object name for the commit.
* @param graphPosition
* the position in the commit-graph of the object.
*/
protected RevCommitCG(AnyObjectId id, int graphPosition) {
super(id);
this.graphPosition = graphPosition;
}
/** {@inheritDoc} */
@Override
void parseHeaders(RevWalk walk) throws MissingObjectException,
IncorrectObjectTypeException, IOException {
CommitGraph graph = walk.commitGraph();
CommitGraph.CommitData data = graph.getCommitData(graphPosition);
if (data == null) {
// RevCommitCG was created because we got its graphPosition from
// commit-graph. If now the commit-graph doesn't know about it,
// something went wrong.
throw new IllegalStateException();
}
if (!walk.shallowCommitsInitialized) {
walk.initializeShallowCommits(this);
}
this.tree = walk.lookupTree(data.getTree());
this.commitTime = (int) data.getCommitTime();
this.generation = data.getGeneration();
if (getParents() == null) {
int[] pGraphList = data.getParents();
if (pGraphList.length == 0) {
this.parents = RevCommit.NO_PARENTS;
} else {
RevCommit[] pList = new RevCommit[pGraphList.length];
for (int i = 0; i < pList.length; i++) {
int graphPos = pGraphList[i];
ObjectId objId = graph.getObjectId(graphPos);
pList[i] = walk.lookupCommit(objId, graphPos);
}
this.parents = pList;
}
}
flags |= PARSED;
if (walk.isRetainBody()) {
super.parseBody(walk);
}
}
/** {@inheritDoc} */
@Override
int getGeneration() {
return generation;
}
}

View File

@ -12,6 +12,8 @@
package org.eclipse.jgit.revwalk;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraph.EMPTY;
import java.io.IOException;
import java.text.MessageFormat;
import java.util.ArrayList;
@ -30,6 +32,7 @@
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.AsyncObjectLoaderQueue;
import org.eclipse.jgit.internal.storage.commitgraph.CommitGraph;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.MutableObjectId;
import org.eclipse.jgit.lib.NullProgressMonitor;
@ -189,6 +192,8 @@ public class RevWalk implements Iterable<RevCommit>, AutoCloseable {
private TreeFilter treeFilter;
private CommitGraph commitGraph;
private boolean retainBody = true;
private boolean rewriteParents = true;
@ -237,6 +242,7 @@ public RevWalk(ObjectReader or) {
filter = RevFilter.ALL;
treeFilter = TreeFilter.ALL;
this.closeReader = closeReader;
commitGraph = null;
}
/**
@ -899,6 +905,29 @@ public RevCommit lookupCommit(AnyObjectId id) {
return c;
}
/**
* This method is intended to be invoked only by {@link RevCommitCG}, in
* order to give commit the correct graphPosition before accessing the
* commit-graph. In this way, the headers of the commit can be obtained in
* constant time.
*
* @param id
* name of the commit object.
* @param graphPos
* the position in the commit-graph of the object.
* @return reference to the commit object. Never null.
* @since 6.5
*/
@NonNull
protected RevCommit lookupCommit(AnyObjectId id, int graphPos) {
RevCommit c = (RevCommit) objects.get(id);
if (c == null) {
c = createCommit(id, graphPos);
objects.add(c);
}
return c;
}
/**
* Locate a reference to a tag without loading it.
* <p>
@ -1135,6 +1164,21 @@ byte[] getCachedBytes(RevObject obj, ObjectLoader ldr)
}
}
/**
* Get the commit-graph.
*
* @return the commit-graph. Never null.
* @since 6.5
*/
@NonNull
CommitGraph commitGraph() {
if (commitGraph == null) {
commitGraph = reader != null ? reader.getCommitGraph().orElse(EMPTY)
: EMPTY;
}
return commitGraph;
}
/**
* Asynchronous object parsing.
*
@ -1650,6 +1694,13 @@ public ObjectWalk toObjectWalkWithSameObjects() {
* @return a new unparsed reference for the object.
*/
protected RevCommit createCommit(AnyObjectId id) {
return createCommit(id, commitGraph().findGraphPosition(id));
}
private RevCommit createCommit(AnyObjectId id, int graphPos) {
if (graphPos >= 0) {
return new RevCommitCG(id, graphPos);
}
return new RevCommit(id);
}