JGit blame very slow for large merge commits that rename files

Adjusted BlameGenerator to filter rename detection with the blame path.
This reduces the running time of the blame computation significantly,
for repositories with massive commits involving renames.

The filtered rename detection is made (internally) available with:
org.eclipse.jgit.internal.diff.FilteredRenameDetector

Bug: 578900
Change-Id: I6580004e81102d685081b8180da1587a35073d36
Signed-off-by: Simeon Andreev <simeon.danailov.andreev@gmail.com>
This commit is contained in:
Simeon Andreev 2022-06-15 22:23:43 +02:00 committed by Andrey Loskutov
parent ba93cd36d8
commit 1a364c49ec
7 changed files with 400 additions and 73 deletions

View File

@ -33,6 +33,7 @@ Import-Package: com.googlecode.javaewah;version="[1.1.6,2.0.0)",
org.eclipse.jgit.ignore;version="[6.3.0,6.4.0)",
org.eclipse.jgit.ignore.internal;version="[6.3.0,6.4.0)",
org.eclipse.jgit.internal;version="[6.3.0,6.4.0)",
org.eclipse.jgit.internal.diff;version="[6.3.0,6.4.0)",
org.eclipse.jgit.internal.diffmergetool;version="[6.3.0,6.4.0)",
org.eclipse.jgit.internal.fsck;version="[6.3.0,6.4.0)",
org.eclipse.jgit.internal.revwalk;version="[6.3.0,6.4.0)",

View File

@ -0,0 +1,101 @@
/*
* Copyright (C) 2022, Google Inc. and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.diff;
import static org.junit.Assert.assertEquals;
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
import org.eclipse.jgit.junit.RepositoryTestCase;
import org.eclipse.jgit.junit.TestRepository;
import org.eclipse.jgit.lib.AbbreviatedObjectId;
import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.Repository;
import org.junit.Before;
public abstract class AbstractRenameDetectionTestCase
extends RepositoryTestCase {
protected static final String PATH_A = "src/A";
protected static final String PATH_B = "src/B";
protected static final String PATH_H = "src/H";
protected static final String PATH_Q = "src/Q";
protected TestRepository<Repository> testDb;
@Override
@Before
public void setUp() throws Exception {
super.setUp();
testDb = new TestRepository<>(db);
}
protected ObjectId blob(String content) throws Exception {
return testDb.blob(content).copy();
}
protected static void assertRename(DiffEntry o, DiffEntry n, int score,
DiffEntry rename) {
assertEquals(ChangeType.RENAME, rename.getChangeType());
assertEquals(o.getOldPath(), rename.getOldPath());
assertEquals(n.getNewPath(), rename.getNewPath());
assertEquals(o.getOldMode(), rename.getOldMode());
assertEquals(n.getNewMode(), rename.getNewMode());
assertEquals(o.getOldId(), rename.getOldId());
assertEquals(n.getNewId(), rename.getNewId());
assertEquals(score, rename.getScore());
}
protected static void assertCopy(DiffEntry o, DiffEntry n, int score,
DiffEntry copy) {
assertEquals(ChangeType.COPY, copy.getChangeType());
assertEquals(o.getOldPath(), copy.getOldPath());
assertEquals(n.getNewPath(), copy.getNewPath());
assertEquals(o.getOldMode(), copy.getOldMode());
assertEquals(n.getNewMode(), copy.getNewMode());
assertEquals(o.getOldId(), copy.getOldId());
assertEquals(n.getNewId(), copy.getNewId());
assertEquals(score, copy.getScore());
}
protected static void assertAdd(String newName, ObjectId newId,
FileMode newMode, DiffEntry add) {
assertEquals(DiffEntry.DEV_NULL, add.oldPath);
assertEquals(DiffEntry.A_ZERO, add.oldId);
assertEquals(FileMode.MISSING, add.oldMode);
assertEquals(ChangeType.ADD, add.changeType);
assertEquals(newName, add.newPath);
assertEquals(AbbreviatedObjectId.fromObjectId(newId), add.newId);
assertEquals(newMode, add.newMode);
}
protected static void assertDelete(String oldName, ObjectId oldId,
FileMode oldMode, DiffEntry delete) {
assertEquals(DiffEntry.DEV_NULL, delete.newPath);
assertEquals(DiffEntry.A_ZERO, delete.newId);
assertEquals(FileMode.MISSING, delete.newMode);
assertEquals(ChangeType.DELETE, delete.changeType);
assertEquals(oldName, delete.oldPath);
assertEquals(AbbreviatedObjectId.fromObjectId(oldId), delete.oldId);
assertEquals(oldMode, delete.oldMode);
}
}

View File

@ -0,0 +1,154 @@
/*
* Copyright (C) 2022, Simeon Andreev and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.diff;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertSame;
import java.util.Arrays;
import java.util.List;
import org.eclipse.jgit.internal.diff.FilteredRenameDetector;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.treewalk.filter.PathFilter;
import org.junit.Before;
import org.junit.Test;
public class FilteredRenameDetectorTest extends AbstractRenameDetectionTestCase {
private FilteredRenameDetector frd;
@Override
@Before
public void setUp() throws Exception {
super.setUp();
frd = new FilteredRenameDetector(db);
}
@Test
public void testExactRename() throws Exception {
ObjectId foo = blob("foo");
ObjectId bar = blob("bar");
DiffEntry a = DiffEntry.add(PATH_A, foo);
DiffEntry b = DiffEntry.delete(PATH_Q, foo);
DiffEntry c = DiffEntry.add(PATH_H, bar);
DiffEntry d = DiffEntry.delete(PATH_B, bar);
List<DiffEntry> changes = Arrays.asList(a, b, c, d);
PathFilter filter = PathFilter.create(PATH_A);
List<DiffEntry> entries = frd.compute(changes, filter);
assertEquals("Unexpected entries in: " + entries, 1, entries.size());
assertRename(b, a, 100, entries.get(0));
}
@Test
public void testExactRename_multipleFilters() throws Exception {
ObjectId foo = blob("foo");
ObjectId bar = blob("bar");
DiffEntry a = DiffEntry.add(PATH_A, foo);
DiffEntry b = DiffEntry.delete(PATH_Q, foo);
DiffEntry c = DiffEntry.add(PATH_H, bar);
DiffEntry d = DiffEntry.delete(PATH_B, bar);
List<DiffEntry> changes = Arrays.asList(a, b, c, d);
List<PathFilter> filters = Arrays.asList(PathFilter.create(PATH_A),
PathFilter.create(PATH_H));
List<DiffEntry> entries = frd.compute(changes, filters);
assertEquals("Unexpected entries in: " + entries, 2, entries.size());
assertRename(b, a, 100, entries.get(0));
assertRename(d, c, 100, entries.get(1));
}
@Test
public void testInexactRename() throws Exception {
ObjectId aId = blob("foo\nbar\nbaz\nblarg\n");
ObjectId bId = blob("foo\nbar\nbaz\nblah\n");
DiffEntry a = DiffEntry.add(PATH_A, aId);
DiffEntry b = DiffEntry.delete(PATH_Q, bId);
ObjectId cId = blob("some\nsort\nof\ntext\n");
ObjectId dId = blob("completely\nunrelated\ntext\n");
DiffEntry c = DiffEntry.add(PATH_B, cId);
DiffEntry d = DiffEntry.delete(PATH_H, dId);
List<DiffEntry> changes = Arrays.asList(a, b, c, d);
PathFilter filter = PathFilter.create(PATH_A);
List<DiffEntry> entries = frd.compute(changes, filter);
assertEquals("Unexpected entries: " + entries, 1, entries.size());
assertRename(b, a, 66, entries.get(0));
}
@Test
public void testInexactRename_multipleFilters() throws Exception {
ObjectId aId = blob("foo\nbar\nbaz\nblarg\n");
ObjectId bId = blob("foo\nbar\nbaz\nblah\n");
DiffEntry a = DiffEntry.add(PATH_A, aId);
DiffEntry b = DiffEntry.delete(PATH_Q, bId);
ObjectId cId = blob("some\nsort\nof\ntext\n");
ObjectId dId = blob("completely\nunrelated\ntext\n");
DiffEntry c = DiffEntry.add(PATH_B, cId);
DiffEntry d = DiffEntry.delete(PATH_H, dId);
List<DiffEntry> changes = Arrays.asList(a, b, c, d);
List<PathFilter> filters = Arrays.asList(PathFilter.create(PATH_A),
PathFilter.create(PATH_H));
List<DiffEntry> entries = frd.compute(changes, filters);
assertEquals("Unexpected entries: " + entries, 2, entries.size());
assertRename(b, a, 66, entries.get(0));
assertSame(d, entries.get(1));
}
@Test
public void testNoRenames() throws Exception {
ObjectId aId = blob("");
ObjectId bId = blob("blah1");
ObjectId cId = blob("");
ObjectId dId = blob("blah2");
DiffEntry a = DiffEntry.add(PATH_A, aId);
DiffEntry b = DiffEntry.delete(PATH_Q, bId);
DiffEntry c = DiffEntry.add(PATH_H, cId);
DiffEntry d = DiffEntry.delete(PATH_B, dId);
List<DiffEntry> changes = Arrays.asList(a, b, c, d);
PathFilter filter = PathFilter.create(PATH_A);
List<DiffEntry> entries = frd.compute(changes, filter);
assertEquals("Unexpected entries in: " + entries, 1, entries.size());
assertSame(a, entries.get(0));
}
@Test
public void testNoRenames_multipleFilters() throws Exception {
ObjectId aId = blob("");
ObjectId bId = blob("blah1");
ObjectId cId = blob("");
ObjectId dId = blob("blah2");
DiffEntry a = DiffEntry.add(PATH_A, aId);
DiffEntry b = DiffEntry.delete(PATH_Q, bId);
DiffEntry c = DiffEntry.add(PATH_H, cId);
DiffEntry d = DiffEntry.delete(PATH_B, dId);
List<DiffEntry> changes = Arrays.asList(a, b, c, d);
List<PathFilter> filters = Arrays.asList(PathFilter.create(PATH_A),
PathFilter.create(PATH_H));
List<DiffEntry> entries = frd.compute(changes, filters);
assertEquals("Unexpected entries in: " + entries, 2, entries.size());
assertSame(a, entries.get(0));
assertSame(c, entries.get(1));
}
}

View File

@ -18,31 +18,20 @@
import java.util.Arrays;
import java.util.List;
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
import org.eclipse.jgit.junit.RepositoryTestCase;
import org.eclipse.jgit.junit.TestRepository;
import org.eclipse.jgit.lib.AbbreviatedObjectId;
import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.Repository;
import org.junit.Before;
import org.junit.Test;
public class RenameDetectorTest extends RepositoryTestCase {
private static final String PATH_A = "src/A";
private static final String PATH_B = "src/B";
private static final String PATH_H = "src/H";
private static final String PATH_Q = "src/Q";
public class RenameDetectorTest extends AbstractRenameDetectionTestCase {
private RenameDetector rd;
private TestRepository<Repository> testDb;
@Override
@Before
public void setUp() throws Exception {
super.setUp();
testDb = new TestRepository<>(db);
rd = new RenameDetector(db);
}
@ -675,62 +664,4 @@ public void testRenameLimit() throws Exception {
assertSame(c, entries.get(2));
assertSame(d, entries.get(3));
}
private ObjectId blob(String content) throws Exception {
return testDb.blob(content).copy();
}
private static void assertRename(DiffEntry o, DiffEntry n, int score,
DiffEntry rename) {
assertEquals(ChangeType.RENAME, rename.getChangeType());
assertEquals(o.getOldPath(), rename.getOldPath());
assertEquals(n.getNewPath(), rename.getNewPath());
assertEquals(o.getOldMode(), rename.getOldMode());
assertEquals(n.getNewMode(), rename.getNewMode());
assertEquals(o.getOldId(), rename.getOldId());
assertEquals(n.getNewId(), rename.getNewId());
assertEquals(score, rename.getScore());
}
private static void assertCopy(DiffEntry o, DiffEntry n, int score,
DiffEntry copy) {
assertEquals(ChangeType.COPY, copy.getChangeType());
assertEquals(o.getOldPath(), copy.getOldPath());
assertEquals(n.getNewPath(), copy.getNewPath());
assertEquals(o.getOldMode(), copy.getOldMode());
assertEquals(n.getNewMode(), copy.getNewMode());
assertEquals(o.getOldId(), copy.getOldId());
assertEquals(n.getNewId(), copy.getNewId());
assertEquals(score, copy.getScore());
}
private static void assertAdd(String newName, ObjectId newId,
FileMode newMode, DiffEntry add) {
assertEquals(DiffEntry.DEV_NULL, add.oldPath);
assertEquals(DiffEntry.A_ZERO, add.oldId);
assertEquals(FileMode.MISSING, add.oldMode);
assertEquals(ChangeType.ADD, add.changeType);
assertEquals(newName, add.newPath);
assertEquals(AbbreviatedObjectId.fromObjectId(newId), add.newId);
assertEquals(newMode, add.newMode);
}
private static void assertDelete(String oldName, ObjectId oldId,
FileMode oldMode, DiffEntry delete) {
assertEquals(DiffEntry.DEV_NULL, delete.newPath);
assertEquals(DiffEntry.A_ZERO, delete.newId);
assertEquals(FileMode.MISSING, delete.newMode);
assertEquals(ChangeType.DELETE, delete.changeType);
assertEquals(oldName, delete.oldPath);
assertEquals(AbbreviatedObjectId.fromObjectId(oldId), delete.oldId);
assertEquals(oldMode, delete.oldMode);
}
}

View File

@ -70,6 +70,8 @@ Export-Package: org.eclipse.jgit.annotations;version="6.3.0",
org.eclipse.jgit.internal;version="6.3.0";
x-friends:="org.eclipse.jgit.test,
org.eclipse.jgit.http.test",
org.eclipse.jgit.internal.diff;version="6.3.0";
x-friends:="org.eclipse.jgit.test",
org.eclipse.jgit.internal.diffmergetool;version="6.3.0";
x-friends:="org.eclipse.jgit.test,
org.eclipse.jgit.pgm.test,

View File

@ -41,6 +41,7 @@
import org.eclipse.jgit.dircache.DirCacheIterator;
import org.eclipse.jgit.errors.NoWorkTreeException;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.internal.diff.FilteredRenameDetector;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.MutableObjectId;
@ -1109,9 +1110,10 @@ private DiffEntry findRename(RevCommit parent, RevCommit commit,
treeWalk.setFilter(TreeFilter.ANY_DIFF);
treeWalk.reset(parent.getTree(), commit.getTree());
renameDetector.reset();
renameDetector.addAll(DiffEntry.scan(treeWalk));
for (DiffEntry ent : renameDetector.compute()) {
List<DiffEntry> diffs = DiffEntry.scan(treeWalk);
FilteredRenameDetector filteredRenameDetector = new FilteredRenameDetector(
renameDetector);
for (DiffEntry ent : filteredRenameDetector.compute(diffs, path)) {
if (isRename(ent) && ent.getNewPath().equals(path.getPath()))
return ent;
}

View File

@ -0,0 +1,136 @@
/*
* Copyright (C) 2022, Simeon Andreev and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.internal.diff;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.eclipse.jgit.diff.DiffEntry;
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
import org.eclipse.jgit.diff.RenameDetector;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.treewalk.filter.PathFilter;
/**
* Provides rename detection in special cases such as blame, where only a subset
* of the renames detected by {@link RenameDetector} is of interest.
*/
public class FilteredRenameDetector {
private final RenameDetector renameDetector;
/**
* @param repository
* The repository in which to check for renames.
*/
public FilteredRenameDetector(Repository repository) {
this(new RenameDetector(repository));
}
/**
* @param renameDetector
* The {@link RenameDetector} to use when checking for renames.
*/
public FilteredRenameDetector(RenameDetector renameDetector) {
this.renameDetector = renameDetector;
}
/**
* @param diffs
* The set of changes to check.
* @param pathFilter
* Filter out changes that didn't affect this path.
* @return The subset of changes that affect only the filtered path.
* @throws IOException
*/
public List<DiffEntry> compute(List<DiffEntry> diffs,
PathFilter pathFilter) throws IOException {
return compute(diffs, Arrays.asList(pathFilter));
}
/**
* Tries to avoid computation overhead in {@link RenameDetector#compute()}
* by filtering diffs related to the path filters only.
* <p>
* Note: current implementation only optimizes added or removed diffs,
* further optimization is possible.
*
* @param changes
* The set of changes to check.
* @param pathFilters
* Filter out changes that didn't affect these paths.
* @return The subset of changes that affect only the filtered paths.
* @throws IOException
* @see RenameDetector#compute()
*/
public List<DiffEntry> compute(List<DiffEntry> changes,
List<PathFilter> pathFilters) throws IOException {
if (pathFilters == null) {
throw new IllegalArgumentException("Must specify path filters"); //$NON-NLS-1$
}
Set<String> paths = new HashSet<>(pathFilters.size());
for (PathFilter pathFilter : pathFilters) {
paths.add(pathFilter.getPath());
}
List<DiffEntry> filtered = new ArrayList<>();
// For new path: skip ADD's that don't match given paths
for (DiffEntry diff : changes) {
ChangeType changeType = diff.getChangeType();
if (changeType != ChangeType.ADD
|| paths.contains(diff.getNewPath())) {
filtered.add(diff);
}
}
renameDetector.reset();
renameDetector.addAll(filtered);
List<DiffEntry> sourceChanges = renameDetector.compute();
filtered.clear();
// For old path: skip DELETE's that don't match given paths
for (DiffEntry diff : changes) {
ChangeType changeType = diff.getChangeType();
if (changeType != ChangeType.DELETE
|| paths.contains(diff.getOldPath())) {
filtered.add(diff);
}
}
renameDetector.reset();
renameDetector.addAll(filtered);
List<DiffEntry> targetChanges = renameDetector.compute();
List<DiffEntry> result = new ArrayList<>();
for (DiffEntry sourceChange : sourceChanges) {
if (paths.contains(sourceChange.getNewPath())) {
result.add(sourceChange);
}
}
for (DiffEntry targetChange : targetChanges) {
if (paths.contains(targetChange.getOldPath())) {
result.add(targetChange);
}
}
renameDetector.reset();
return result;
}
}