RevWalk: use changed path filters
Teach RevWalk, TreeRevFilter, PathFilter, and FollowFilter to use changed path filters, whenever available, to speed revision walks by skipping commits that fail the changed path filter. This work is based on earlier work by Kyle Zhao (I441be984b609669cff77617ecfc838b080ce0816). Change-Id: I7396f70241e571c63aabe337f6de1b8b9800f7ed Signed-off-by: kylezhao <kylezhao@tencent.com> Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
This commit is contained in:
parent
ff0f7c174f
commit
d3b40e72ac
|
@ -26,6 +26,7 @@
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.eclipse.jgit.api.Git;
|
import org.eclipse.jgit.api.Git;
|
||||||
|
import org.eclipse.jgit.diff.DiffConfig;
|
||||||
import org.eclipse.jgit.internal.storage.file.GC;
|
import org.eclipse.jgit.internal.storage.file.GC;
|
||||||
import org.eclipse.jgit.lib.AnyObjectId;
|
import org.eclipse.jgit.lib.AnyObjectId;
|
||||||
import org.eclipse.jgit.lib.ConfigConstants;
|
import org.eclipse.jgit.lib.ConfigConstants;
|
||||||
|
@ -166,6 +167,72 @@ public void testTreeFilter() throws Exception {
|
||||||
assertNull(rw.next());
|
assertNull(rw.next());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testChangedPathFilter() throws Exception {
|
||||||
|
RevCommit c1 = commitFile("file1", "1", "master");
|
||||||
|
commitFile("file2", "2", "master");
|
||||||
|
RevCommit c3 = commitFile("file1", "3", "master");
|
||||||
|
RevCommit c4 = commitFile("file2", "4", "master");
|
||||||
|
|
||||||
|
enableAndWriteCommitGraph();
|
||||||
|
|
||||||
|
TreeRevFilter trf = new TreeRevFilter(rw, PathFilter.create("file1"));
|
||||||
|
rw.markStart(rw.lookupCommit(c4));
|
||||||
|
rw.setRevFilter(trf);
|
||||||
|
assertEquals(c3, rw.next());
|
||||||
|
assertEquals(c1, rw.next());
|
||||||
|
assertNull(rw.next());
|
||||||
|
|
||||||
|
// 1 commit that has exactly one parent and matches path
|
||||||
|
assertEquals(1, trf.getChangedPathFilterTruePositive());
|
||||||
|
|
||||||
|
// No false positives
|
||||||
|
assertEquals(0, trf.getChangedPathFilterFalsePositive());
|
||||||
|
|
||||||
|
// 2 commits that have exactly one parent and don't match path
|
||||||
|
assertEquals(2, trf.getChangedPathFilterNegative());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testChangedPathFilterWithFollowFilter() throws Exception {
|
||||||
|
RevCommit c0 = commit(tree());
|
||||||
|
RevCommit c1 = commit(tree(file("file", blob("contents"))), c0);
|
||||||
|
RevCommit c2 = commit(tree(file("file", blob("contents")),
|
||||||
|
file("unrelated", blob("unrelated change"))), c1);
|
||||||
|
RevCommit c3 = commit(tree(file("renamed-file", blob("contents")),
|
||||||
|
file("unrelated", blob("unrelated change"))), c2);
|
||||||
|
RevCommit c4 = commit(
|
||||||
|
tree(file("renamed-file", blob("contents")),
|
||||||
|
file("unrelated", blob("another unrelated change"))),
|
||||||
|
c3);
|
||||||
|
branch(c4, "master");
|
||||||
|
|
||||||
|
enableAndWriteCommitGraph();
|
||||||
|
|
||||||
|
db.getConfig().setString(ConfigConstants.CONFIG_DIFF_SECTION, null,
|
||||||
|
ConfigConstants.CONFIG_KEY_RENAMES, "true");
|
||||||
|
|
||||||
|
TreeRevFilter trf = new TreeRevFilter(rw,
|
||||||
|
new FollowFilter(PathFilter.create("renamed-file"),
|
||||||
|
db.getConfig().get(DiffConfig.KEY)));
|
||||||
|
rw.markStart(rw.lookupCommit(c4));
|
||||||
|
rw.setRevFilter(trf);
|
||||||
|
assertEquals(c3, rw.next());
|
||||||
|
assertEquals(c1, rw.next());
|
||||||
|
assertNull(rw.next());
|
||||||
|
|
||||||
|
// Path "renamed-file" is in c3's bloom filter, and another path "file"
|
||||||
|
// is in c1's bloom filter (we know of "file" because the rev walk
|
||||||
|
// detected that "renamed-file" is a renaming of "file")
|
||||||
|
assertEquals(2, trf.getChangedPathFilterTruePositive());
|
||||||
|
|
||||||
|
// No false positives
|
||||||
|
assertEquals(0, trf.getChangedPathFilterFalsePositive());
|
||||||
|
|
||||||
|
// 2 commits that have exactly one parent and don't match path
|
||||||
|
assertEquals(2, trf.getChangedPathFilterNegative());
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testWalkWithCommitMessageFilter() throws Exception {
|
public void testWalkWithCommitMessageFilter() throws Exception {
|
||||||
RevCommit a = commit();
|
RevCommit a = commit();
|
||||||
|
|
|
@ -11,6 +11,8 @@
|
||||||
package org.eclipse.jgit.revwalk;
|
package org.eclipse.jgit.revwalk;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import org.eclipse.jgit.diff.DiffConfig;
|
import org.eclipse.jgit.diff.DiffConfig;
|
||||||
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
|
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
|
||||||
|
@ -89,6 +91,12 @@ public boolean shouldBeRecursive() {
|
||||||
return path.shouldBeRecursive() || ANY_DIFF.shouldBeRecursive();
|
return path.shouldBeRecursive() || ANY_DIFF.shouldBeRecursive();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Optional<Set<byte[]>> getPathsBestEffort() {
|
||||||
|
return path.getPathsBestEffort();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** {@inheritDoc} */
|
||||||
@Override
|
@Override
|
||||||
public TreeFilter clone() {
|
public TreeFilter clone() {
|
||||||
return new FollowFilter(path.clone(), cfg);
|
return new FollowFilter(path.clone(), cfg);
|
||||||
|
|
|
@ -25,6 +25,7 @@
|
||||||
import org.eclipse.jgit.annotations.Nullable;
|
import org.eclipse.jgit.annotations.Nullable;
|
||||||
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
|
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
|
||||||
import org.eclipse.jgit.errors.MissingObjectException;
|
import org.eclipse.jgit.errors.MissingObjectException;
|
||||||
|
import org.eclipse.jgit.internal.storage.commitgraph.ChangedPathFilter;
|
||||||
import org.eclipse.jgit.lib.AnyObjectId;
|
import org.eclipse.jgit.lib.AnyObjectId;
|
||||||
import org.eclipse.jgit.lib.Constants;
|
import org.eclipse.jgit.lib.Constants;
|
||||||
import org.eclipse.jgit.lib.MutableObjectId;
|
import org.eclipse.jgit.lib.MutableObjectId;
|
||||||
|
@ -686,6 +687,20 @@ int getGeneration() {
|
||||||
return Constants.COMMIT_GENERATION_UNKNOWN;
|
return Constants.COMMIT_GENERATION_UNKNOWN;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the changed path filter of the commit.
|
||||||
|
* <p>
|
||||||
|
* This is null when there is no commit graph file, the commit is not in the
|
||||||
|
* commit graph file, or the commit graph file was generated without changed
|
||||||
|
* path filters.
|
||||||
|
*
|
||||||
|
* @return the changed path filter
|
||||||
|
* @since 6.7
|
||||||
|
*/
|
||||||
|
ChangedPathFilter getChangedPathFilter() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reset this commit to allow another RevWalk with the same instances.
|
* Reset this commit to allow another RevWalk with the same instances.
|
||||||
* <p>
|
* <p>
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
|
|
||||||
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
|
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
|
||||||
import org.eclipse.jgit.errors.MissingObjectException;
|
import org.eclipse.jgit.errors.MissingObjectException;
|
||||||
|
import org.eclipse.jgit.internal.storage.commitgraph.ChangedPathFilter;
|
||||||
import org.eclipse.jgit.internal.storage.commitgraph.CommitGraph;
|
import org.eclipse.jgit.internal.storage.commitgraph.CommitGraph;
|
||||||
import org.eclipse.jgit.lib.AnyObjectId;
|
import org.eclipse.jgit.lib.AnyObjectId;
|
||||||
import org.eclipse.jgit.lib.Constants;
|
import org.eclipse.jgit.lib.Constants;
|
||||||
|
@ -29,6 +30,8 @@ class RevCommitCG extends RevCommit {
|
||||||
|
|
||||||
private final int graphPosition;
|
private final int graphPosition;
|
||||||
|
|
||||||
|
private final ChangedPathFilter changedPathFilter;
|
||||||
|
|
||||||
private int generation = Constants.COMMIT_GENERATION_UNKNOWN;
|
private int generation = Constants.COMMIT_GENERATION_UNKNOWN;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -38,10 +41,14 @@ class RevCommitCG extends RevCommit {
|
||||||
* object name for the commit.
|
* object name for the commit.
|
||||||
* @param graphPosition
|
* @param graphPosition
|
||||||
* the position in the commit-graph of the object.
|
* the position in the commit-graph of the object.
|
||||||
|
* @param changedPathFilter
|
||||||
|
* the changed path filter if one exists
|
||||||
*/
|
*/
|
||||||
protected RevCommitCG(AnyObjectId id, int graphPosition) {
|
protected RevCommitCG(AnyObjectId id, int graphPosition,
|
||||||
|
ChangedPathFilter changedPathFilter) {
|
||||||
super(id);
|
super(id);
|
||||||
this.graphPosition = graphPosition;
|
this.graphPosition = graphPosition;
|
||||||
|
this.changedPathFilter = changedPathFilter;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -100,4 +107,10 @@ private void parseInGraph(RevWalk walk) throws IOException {
|
||||||
int getGeneration() {
|
int getGeneration() {
|
||||||
return generation;
|
return generation;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** {@inheritDoc} */
|
||||||
|
@Override
|
||||||
|
ChangedPathFilter getChangedPathFilter() {
|
||||||
|
return changedPathFilter;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1713,7 +1713,8 @@ protected RevCommit createCommit(AnyObjectId id) {
|
||||||
|
|
||||||
private RevCommit createCommit(AnyObjectId id, int graphPos) {
|
private RevCommit createCommit(AnyObjectId id, int graphPos) {
|
||||||
if (graphPos >= 0) {
|
if (graphPos >= 0) {
|
||||||
return new RevCommitCG(id, graphPos);
|
return new RevCommitCG(id, graphPos,
|
||||||
|
commitGraph().getChangedPathFilter(graphPos));
|
||||||
}
|
}
|
||||||
return new RevCommit(id);
|
return new RevCommit(id);
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,7 +12,10 @@
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.eclipse.jgit.internal.storage.commitgraph.ChangedPathFilter;
|
||||||
import org.eclipse.jgit.diff.DiffConfig;
|
import org.eclipse.jgit.diff.DiffConfig;
|
||||||
import org.eclipse.jgit.diff.DiffEntry;
|
import org.eclipse.jgit.diff.DiffEntry;
|
||||||
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
|
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
|
||||||
|
@ -47,6 +50,12 @@ public class TreeRevFilter extends RevFilter {
|
||||||
|
|
||||||
private final TreeWalk pathFilter;
|
private final TreeWalk pathFilter;
|
||||||
|
|
||||||
|
private long changedPathFilterTruePositive = 0;
|
||||||
|
|
||||||
|
private long changedPathFilterFalsePositive = 0;
|
||||||
|
|
||||||
|
private long changedPathFilterNegative = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a {@link org.eclipse.jgit.revwalk.filter.RevFilter} from a
|
* Create a {@link org.eclipse.jgit.revwalk.filter.RevFilter} from a
|
||||||
* {@link org.eclipse.jgit.treewalk.filter.TreeFilter}.
|
* {@link org.eclipse.jgit.treewalk.filter.TreeFilter}.
|
||||||
|
@ -122,6 +131,23 @@ public boolean include(RevWalk walker, RevCommit c)
|
||||||
// We have exactly one parent. This is a very common case.
|
// We have exactly one parent. This is a very common case.
|
||||||
//
|
//
|
||||||
int chgs = 0, adds = 0;
|
int chgs = 0, adds = 0;
|
||||||
|
boolean changedPathFilterUsed = false;
|
||||||
|
boolean mustCalculateChgs = true;
|
||||||
|
ChangedPathFilter cpf = c.getChangedPathFilter();
|
||||||
|
if (cpf != null) {
|
||||||
|
Optional<Set<byte[]>> paths = pathFilter.getFilter()
|
||||||
|
.getPathsBestEffort();
|
||||||
|
if (paths.isPresent()) {
|
||||||
|
changedPathFilterUsed = true;
|
||||||
|
for (byte[] path : paths.get()) {
|
||||||
|
if (!cpf.maybeContains(path)) {
|
||||||
|
mustCalculateChgs = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (mustCalculateChgs) {
|
||||||
while (tw.next()) {
|
while (tw.next()) {
|
||||||
chgs++;
|
chgs++;
|
||||||
if (tw.getRawMode(0) == 0 && tw.getRawMode(1) != 0) {
|
if (tw.getRawMode(0) == 0 && tw.getRawMode(1) != 0) {
|
||||||
|
@ -130,6 +156,18 @@ public boolean include(RevWalk walker, RevCommit c)
|
||||||
break; // no point in looking at this further.
|
break; // no point in looking at this further.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (changedPathFilterUsed) {
|
||||||
|
if (chgs > 0) {
|
||||||
|
changedPathFilterTruePositive++;
|
||||||
|
} else {
|
||||||
|
changedPathFilterFalsePositive++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (changedPathFilterUsed) {
|
||||||
|
changedPathFilterNegative++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (chgs == 0) {
|
if (chgs == 0) {
|
||||||
// No changes, so our tree is effectively the same as
|
// No changes, so our tree is effectively the same as
|
||||||
|
@ -244,6 +282,37 @@ public boolean requiresCommitBody() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return how many times a changed path filter correctly predicted that a
|
||||||
|
* path was changed in a commit, for statistics gathering purposes.
|
||||||
|
*
|
||||||
|
* @return count of true positives
|
||||||
|
*/
|
||||||
|
public long getChangedPathFilterTruePositive() {
|
||||||
|
return changedPathFilterTruePositive;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return how many times a changed path filter wrongly predicted that a path
|
||||||
|
* was changed in a commit, for statistics gathering purposes.
|
||||||
|
*
|
||||||
|
* @return count of false positives
|
||||||
|
*/
|
||||||
|
public long getChangedPathFilterFalsePositive() {
|
||||||
|
return changedPathFilterFalsePositive;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return how many times a changed path filter predicted that a path was not
|
||||||
|
* changed in a commit (allowing that commit to be skipped), for statistics
|
||||||
|
* gathering purposes.
|
||||||
|
*
|
||||||
|
* @return count of negatives
|
||||||
|
*/
|
||||||
|
public long getChangedPathFilterNegative() {
|
||||||
|
return changedPathFilterNegative;
|
||||||
|
}
|
||||||
|
|
||||||
private void updateFollowFilter(ObjectId[] trees, DiffConfig cfg)
|
private void updateFollowFilter(ObjectId[] trees, DiffConfig cfg)
|
||||||
throws MissingObjectException, IncorrectObjectTypeException,
|
throws MissingObjectException, IncorrectObjectTypeException,
|
||||||
CorruptObjectException, IOException {
|
CorruptObjectException, IOException {
|
||||||
|
|
|
@ -11,6 +11,9 @@
|
||||||
|
|
||||||
package org.eclipse.jgit.treewalk.filter;
|
package org.eclipse.jgit.treewalk.filter;
|
||||||
|
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.HashSet;
|
||||||
import org.eclipse.jgit.internal.JGitText;
|
import org.eclipse.jgit.internal.JGitText;
|
||||||
import org.eclipse.jgit.lib.Constants;
|
import org.eclipse.jgit.lib.Constants;
|
||||||
import org.eclipse.jgit.treewalk.TreeWalk;
|
import org.eclipse.jgit.treewalk.TreeWalk;
|
||||||
|
@ -45,7 +48,8 @@ public static PathFilter create(String path) {
|
||||||
while (path.endsWith("/")) //$NON-NLS-1$
|
while (path.endsWith("/")) //$NON-NLS-1$
|
||||||
path = path.substring(0, path.length() - 1);
|
path = path.substring(0, path.length() - 1);
|
||||||
if (path.length() == 0)
|
if (path.length() == 0)
|
||||||
throw new IllegalArgumentException(JGitText.get().emptyPathNotPermitted);
|
throw new IllegalArgumentException(
|
||||||
|
JGitText.get().emptyPathNotPermitted);
|
||||||
return new PathFilter(path);
|
return new PathFilter(path);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -85,6 +89,14 @@ public boolean shouldBeRecursive() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Optional<Set<byte[]>> getPathsBestEffort() {
|
||||||
|
HashSet<byte[]> s = new HashSet<>();
|
||||||
|
s.add(pathRaw);
|
||||||
|
return Optional.of(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** {@inheritDoc} */
|
||||||
@Override
|
@Override
|
||||||
public PathFilter clone() {
|
public PathFilter clone() {
|
||||||
return this;
|
return this;
|
||||||
|
|
|
@ -11,6 +11,8 @@
|
||||||
package org.eclipse.jgit.treewalk.filter;
|
package org.eclipse.jgit.treewalk.filter;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import org.eclipse.jgit.dircache.DirCacheIterator;
|
import org.eclipse.jgit.dircache.DirCacheIterator;
|
||||||
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
|
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
|
||||||
|
@ -188,10 +190,8 @@ public abstract boolean include(TreeWalk walker)
|
||||||
* as thrown by {@link #include(TreeWalk)}
|
* as thrown by {@link #include(TreeWalk)}
|
||||||
* @since 4.7
|
* @since 4.7
|
||||||
*/
|
*/
|
||||||
public int matchFilter(TreeWalk walker)
|
public int matchFilter(TreeWalk walker) throws MissingObjectException,
|
||||||
throws MissingObjectException, IncorrectObjectTypeException,
|
IncorrectObjectTypeException, IOException {
|
||||||
IOException
|
|
||||||
{
|
|
||||||
return include(walker) ? 0 : 1;
|
return include(walker) ? 0 : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -209,6 +209,19 @@ public int matchFilter(TreeWalk walker)
|
||||||
*/
|
*/
|
||||||
public abstract boolean shouldBeRecursive();
|
public abstract boolean shouldBeRecursive();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If this filter checks that a specific set of paths have all been
|
||||||
|
* modified, returns that set of paths to be checked against a changed path
|
||||||
|
* filter. Otherwise, returns empty.
|
||||||
|
*
|
||||||
|
* @return a set of paths, or empty
|
||||||
|
*
|
||||||
|
* @since 6.7
|
||||||
|
*/
|
||||||
|
public Optional<Set<byte[]>> getPathsBestEffort() {
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* {@inheritDoc}
|
* {@inheritDoc}
|
||||||
*
|
*
|
||||||
|
|
Loading…
Reference in New Issue