From d3b40e72acd30b8842da8f450775a5c847ad20ef Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Tue, 2 May 2023 10:44:16 -0700 Subject: [PATCH] RevWalk: use changed path filters Teach RevWalk, TreeRevFilter, PathFilter, and FollowFilter to use changed path filters, whenever available, to speed revision walks by skipping commits that fail the changed path filter. This work is based on earlier work by Kyle Zhao (I441be984b609669cff77617ecfc838b080ce0816). Change-Id: I7396f70241e571c63aabe337f6de1b8b9800f7ed Signed-off-by: kylezhao Signed-off-by: Jonathan Tan --- .../jgit/revwalk/RevWalkCommitGraphTest.java | 67 +++++++++++++++ .../eclipse/jgit/revwalk/FollowFilter.java | 8 ++ .../org/eclipse/jgit/revwalk/RevCommit.java | 15 ++++ .../org/eclipse/jgit/revwalk/RevCommitCG.java | 15 +++- .../src/org/eclipse/jgit/revwalk/RevWalk.java | 3 +- .../eclipse/jgit/revwalk/TreeRevFilter.java | 81 +++++++++++++++++-- .../jgit/treewalk/filter/PathFilter.java | 14 +++- .../jgit/treewalk/filter/TreeFilter.java | 21 ++++- 8 files changed, 211 insertions(+), 13 deletions(-) diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/RevWalkCommitGraphTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/RevWalkCommitGraphTest.java index 3cc036894..9809d3405 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/RevWalkCommitGraphTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/RevWalkCommitGraphTest.java @@ -26,6 +26,7 @@ import java.util.List; import org.eclipse.jgit.api.Git; +import org.eclipse.jgit.diff.DiffConfig; import org.eclipse.jgit.internal.storage.file.GC; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.ConfigConstants; @@ -166,6 +167,72 @@ public void testTreeFilter() throws Exception { assertNull(rw.next()); } + @Test + public void testChangedPathFilter() throws Exception { + RevCommit c1 = commitFile("file1", "1", "master"); + commitFile("file2", "2", "master"); + RevCommit c3 = commitFile("file1", "3", "master"); + RevCommit c4 = commitFile("file2", "4", "master"); + + enableAndWriteCommitGraph(); + + TreeRevFilter trf = new TreeRevFilter(rw, PathFilter.create("file1")); + rw.markStart(rw.lookupCommit(c4)); + rw.setRevFilter(trf); + assertEquals(c3, rw.next()); + assertEquals(c1, rw.next()); + assertNull(rw.next()); + + // 1 commit that has exactly one parent and matches path + assertEquals(1, trf.getChangedPathFilterTruePositive()); + + // No false positives + assertEquals(0, trf.getChangedPathFilterFalsePositive()); + + // 2 commits that have exactly one parent and don't match path + assertEquals(2, trf.getChangedPathFilterNegative()); + } + + @Test + public void testChangedPathFilterWithFollowFilter() throws Exception { + RevCommit c0 = commit(tree()); + RevCommit c1 = commit(tree(file("file", blob("contents"))), c0); + RevCommit c2 = commit(tree(file("file", blob("contents")), + file("unrelated", blob("unrelated change"))), c1); + RevCommit c3 = commit(tree(file("renamed-file", blob("contents")), + file("unrelated", blob("unrelated change"))), c2); + RevCommit c4 = commit( + tree(file("renamed-file", blob("contents")), + file("unrelated", blob("another unrelated change"))), + c3); + branch(c4, "master"); + + enableAndWriteCommitGraph(); + + db.getConfig().setString(ConfigConstants.CONFIG_DIFF_SECTION, null, + ConfigConstants.CONFIG_KEY_RENAMES, "true"); + + TreeRevFilter trf = new TreeRevFilter(rw, + new FollowFilter(PathFilter.create("renamed-file"), + db.getConfig().get(DiffConfig.KEY))); + rw.markStart(rw.lookupCommit(c4)); + rw.setRevFilter(trf); + assertEquals(c3, rw.next()); + assertEquals(c1, rw.next()); + assertNull(rw.next()); + + // Path "renamed-file" is in c3's bloom filter, and another path "file" + // is in c1's bloom filter (we know of "file" because the rev walk + // detected that "renamed-file" is a renaming of "file") + assertEquals(2, trf.getChangedPathFilterTruePositive()); + + // No false positives + assertEquals(0, trf.getChangedPathFilterFalsePositive()); + + // 2 commits that have exactly one parent and don't match path + assertEquals(2, trf.getChangedPathFilterNegative()); + } + @Test public void testWalkWithCommitMessageFilter() throws Exception { RevCommit a = commit(); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/FollowFilter.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/FollowFilter.java index 14a34375c..35ef51f4f 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/FollowFilter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/FollowFilter.java @@ -11,6 +11,8 @@ package org.eclipse.jgit.revwalk; import java.io.IOException; +import java.util.Optional; +import java.util.Set; import org.eclipse.jgit.diff.DiffConfig; import org.eclipse.jgit.errors.IncorrectObjectTypeException; @@ -89,6 +91,12 @@ public boolean shouldBeRecursive() { return path.shouldBeRecursive() || ANY_DIFF.shouldBeRecursive(); } + @Override + public Optional> getPathsBestEffort() { + return path.getPathsBestEffort(); + } + + /** {@inheritDoc} */ @Override public TreeFilter clone() { return new FollowFilter(path.clone(), cfg); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommit.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommit.java index e32f28ebb..316aeda4f 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommit.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommit.java @@ -25,6 +25,7 @@ import org.eclipse.jgit.annotations.Nullable; import org.eclipse.jgit.errors.IncorrectObjectTypeException; import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.internal.storage.commitgraph.ChangedPathFilter; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.MutableObjectId; @@ -686,6 +687,20 @@ int getGeneration() { return Constants.COMMIT_GENERATION_UNKNOWN; } + /** + * Get the changed path filter of the commit. + *

+ * This is null when there is no commit graph file, the commit is not in the + * commit graph file, or the commit graph file was generated without changed + * path filters. + * + * @return the changed path filter + * @since 6.7 + */ + ChangedPathFilter getChangedPathFilter() { + return null; + } + /** * Reset this commit to allow another RevWalk with the same instances. *

diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommitCG.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommitCG.java index 3846ca579..bfc0c59aa 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommitCG.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommitCG.java @@ -14,6 +14,7 @@ import org.eclipse.jgit.errors.IncorrectObjectTypeException; import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.internal.storage.commitgraph.ChangedPathFilter; import org.eclipse.jgit.internal.storage.commitgraph.CommitGraph; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.Constants; @@ -29,6 +30,8 @@ class RevCommitCG extends RevCommit { private final int graphPosition; + private final ChangedPathFilter changedPathFilter; + private int generation = Constants.COMMIT_GENERATION_UNKNOWN; /** @@ -38,10 +41,14 @@ class RevCommitCG extends RevCommit { * object name for the commit. * @param graphPosition * the position in the commit-graph of the object. + * @param changedPathFilter + * the changed path filter if one exists */ - protected RevCommitCG(AnyObjectId id, int graphPosition) { + protected RevCommitCG(AnyObjectId id, int graphPosition, + ChangedPathFilter changedPathFilter) { super(id); this.graphPosition = graphPosition; + this.changedPathFilter = changedPathFilter; } @Override @@ -100,4 +107,10 @@ private void parseInGraph(RevWalk walk) throws IOException { int getGeneration() { return generation; } + + /** {@inheritDoc} */ + @Override + ChangedPathFilter getChangedPathFilter() { + return changedPathFilter; + } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevWalk.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevWalk.java index 27a09f495..f4bf710ed 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevWalk.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevWalk.java @@ -1713,7 +1713,8 @@ protected RevCommit createCommit(AnyObjectId id) { private RevCommit createCommit(AnyObjectId id, int graphPos) { if (graphPos >= 0) { - return new RevCommitCG(id, graphPos); + return new RevCommitCG(id, graphPos, + commitGraph().getChangedPathFilter(graphPos)); } return new RevCommit(id); } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/TreeRevFilter.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/TreeRevFilter.java index 98bb5884f..017a825ac 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/TreeRevFilter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/TreeRevFilter.java @@ -12,7 +12,10 @@ import java.io.IOException; import java.util.List; +import java.util.Optional; +import java.util.Set; +import org.eclipse.jgit.internal.storage.commitgraph.ChangedPathFilter; import org.eclipse.jgit.diff.DiffConfig; import org.eclipse.jgit.diff.DiffEntry; import org.eclipse.jgit.diff.DiffEntry.ChangeType; @@ -47,6 +50,12 @@ public class TreeRevFilter extends RevFilter { private final TreeWalk pathFilter; + private long changedPathFilterTruePositive = 0; + + private long changedPathFilterFalsePositive = 0; + + private long changedPathFilterNegative = 0; + /** * Create a {@link org.eclipse.jgit.revwalk.filter.RevFilter} from a * {@link org.eclipse.jgit.treewalk.filter.TreeFilter}. @@ -122,12 +131,41 @@ public boolean include(RevWalk walker, RevCommit c) // We have exactly one parent. This is a very common case. // int chgs = 0, adds = 0; - while (tw.next()) { - chgs++; - if (tw.getRawMode(0) == 0 && tw.getRawMode(1) != 0) { - adds++; - } else { - break; // no point in looking at this further. + boolean changedPathFilterUsed = false; + boolean mustCalculateChgs = true; + ChangedPathFilter cpf = c.getChangedPathFilter(); + if (cpf != null) { + Optional> paths = pathFilter.getFilter() + .getPathsBestEffort(); + if (paths.isPresent()) { + changedPathFilterUsed = true; + for (byte[] path : paths.get()) { + if (!cpf.maybeContains(path)) { + mustCalculateChgs = false; + break; + } + } + } + } + if (mustCalculateChgs) { + while (tw.next()) { + chgs++; + if (tw.getRawMode(0) == 0 && tw.getRawMode(1) != 0) { + adds++; + } else { + break; // no point in looking at this further. + } + } + if (changedPathFilterUsed) { + if (chgs > 0) { + changedPathFilterTruePositive++; + } else { + changedPathFilterFalsePositive++; + } + } + } else { + if (changedPathFilterUsed) { + changedPathFilterNegative++; } } @@ -244,6 +282,37 @@ public boolean requiresCommitBody() { return false; } + /** + * Return how many times a changed path filter correctly predicted that a + * path was changed in a commit, for statistics gathering purposes. + * + * @return count of true positives + */ + public long getChangedPathFilterTruePositive() { + return changedPathFilterTruePositive; + } + + /** + * Return how many times a changed path filter wrongly predicted that a path + * was changed in a commit, for statistics gathering purposes. + * + * @return count of false positives + */ + public long getChangedPathFilterFalsePositive() { + return changedPathFilterFalsePositive; + } + + /** + * Return how many times a changed path filter predicted that a path was not + * changed in a commit (allowing that commit to be skipped), for statistics + * gathering purposes. + * + * @return count of negatives + */ + public long getChangedPathFilterNegative() { + return changedPathFilterNegative; + } + private void updateFollowFilter(ObjectId[] trees, DiffConfig cfg) throws MissingObjectException, IncorrectObjectTypeException, CorruptObjectException, IOException { diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/PathFilter.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/PathFilter.java index 63e587ab2..66ab8bb5b 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/PathFilter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/PathFilter.java @@ -11,6 +11,9 @@ package org.eclipse.jgit.treewalk.filter; +import java.util.Optional; +import java.util.Set; +import java.util.HashSet; import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.treewalk.TreeWalk; @@ -45,7 +48,8 @@ public static PathFilter create(String path) { while (path.endsWith("/")) //$NON-NLS-1$ path = path.substring(0, path.length() - 1); if (path.length() == 0) - throw new IllegalArgumentException(JGitText.get().emptyPathNotPermitted); + throw new IllegalArgumentException( + JGitText.get().emptyPathNotPermitted); return new PathFilter(path); } @@ -85,6 +89,14 @@ public boolean shouldBeRecursive() { return false; } + @Override + public Optional> getPathsBestEffort() { + HashSet s = new HashSet<>(); + s.add(pathRaw); + return Optional.of(s); + } + + /** {@inheritDoc} */ @Override public PathFilter clone() { return this; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/TreeFilter.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/TreeFilter.java index 28f2cd946..22d430bc2 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/TreeFilter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/TreeFilter.java @@ -11,6 +11,8 @@ package org.eclipse.jgit.treewalk.filter; import java.io.IOException; +import java.util.Optional; +import java.util.Set; import org.eclipse.jgit.dircache.DirCacheIterator; import org.eclipse.jgit.errors.IncorrectObjectTypeException; @@ -188,10 +190,8 @@ public abstract boolean include(TreeWalk walker) * as thrown by {@link #include(TreeWalk)} * @since 4.7 */ - public int matchFilter(TreeWalk walker) - throws MissingObjectException, IncorrectObjectTypeException, - IOException - { + public int matchFilter(TreeWalk walker) throws MissingObjectException, + IncorrectObjectTypeException, IOException { return include(walker) ? 0 : 1; } @@ -209,6 +209,19 @@ public int matchFilter(TreeWalk walker) */ public abstract boolean shouldBeRecursive(); + /** + * If this filter checks that a specific set of paths have all been + * modified, returns that set of paths to be checked against a changed path + * filter. Otherwise, returns empty. + * + * @return a set of paths, or empty + * + * @since 6.7 + */ + public Optional> getPathsBestEffort() { + return Optional.empty(); + } + /** * {@inheritDoc} *