Added a preliminary version of rename detection

JGit does not currently do rename detection during diffs. I added
a class that, given a TreeWalk to iterate over, can output a list
of DiffEntry's for that TreeWalk, taking into account renames. This
class only detects renames by SHA1's. More complex rename detection,
along the lines of what C Git does will be added later.

Change-Id: I93606ce15da70df6660651ec322ea50718dd7c04
This commit is contained in:
Jeff Schumacher 2010-07-01 15:30:46 -07:00
parent 7b0b4110ed
commit cb8e1e6014
5 changed files with 479 additions and 0 deletions

View File

@ -0,0 +1,204 @@
/*
* Copyright (C) 2010, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.diff;
import java.util.List;
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
import org.eclipse.jgit.junit.TestRepository;
import org.eclipse.jgit.lib.AbbreviatedObjectId;
import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.RepositoryTestCase;
public class RenameDetectorTest extends RepositoryTestCase {
RenameDetector rd;
TestRepository testDb;
@Override
public void setUp() throws Exception {
super.setUp();
testDb = new TestRepository(db);
rd = new RenameDetector();
}
public void testGetEntriesAddDelete() throws Exception {
ObjectId foo = testDb.blob("foo").copy();
DiffEntry a = new DiffEntry();
a.newId = AbbreviatedObjectId.fromObjectId(foo);
a.newMode = FileMode.REGULAR_FILE;
a.newName = "some/file.c";
a.changeType = ChangeType.ADD;
DiffEntry b = new DiffEntry();
b.oldId = AbbreviatedObjectId.fromObjectId(foo);
b.oldMode = FileMode.REGULAR_FILE;
b.oldName = "some/other_file.c";
b.changeType = ChangeType.DELETE;
rd.addDiffEntry(a);
rd.addDiffEntry(b);
List<DiffEntry> entries = rd.getEntries();
assertEquals(1, entries.size());
DiffEntry rename = entries.get(0);
assertNotNull(rename);
assertTrue(foo.equals(rename.newId.toObjectId()));
assertTrue(foo.equals(rename.oldId.toObjectId()));
assertEquals(FileMode.REGULAR_FILE, rename.newMode);
assertEquals(FileMode.REGULAR_FILE, rename.oldMode);
assertEquals(ChangeType.RENAME, rename.changeType);
assertEquals("some/file.c", rename.newName);
assertEquals("some/other_file.c", rename.oldName);
}
public void testGetEntriesAddDeleteModify() throws Exception {
ObjectId foo = testDb.blob("foo").copy();
ObjectId bar = testDb.blob("bar").copy();
DiffEntry a = new DiffEntry();
a.newId = AbbreviatedObjectId.fromObjectId(foo);
a.newMode = FileMode.REGULAR_FILE;
a.newName = "some/file.c";
a.changeType = ChangeType.ADD;
DiffEntry b = new DiffEntry();
b.oldId = AbbreviatedObjectId.fromObjectId(foo);
b.oldMode = FileMode.REGULAR_FILE;
b.oldName = "some/other_file.c";
b.changeType = ChangeType.DELETE;
DiffEntry c = new DiffEntry();
c.newId = c.oldId = AbbreviatedObjectId.fromObjectId(bar);
c.newMode = c.oldMode = FileMode.REGULAR_FILE;
c.newName = c.oldName = "some/header.h";
c.changeType = ChangeType.MODIFY;
rd.addDiffEntry(a);
rd.addDiffEntry(b);
rd.addDiffEntry(c);
List<DiffEntry> entries = rd.getEntries();
assertEquals(2, entries.size());
// The renamed change should be first because the output should be
// sorted by newName
DiffEntry rename = entries.get(0);
assertNotNull(rename);
assertTrue(foo.equals(rename.newId.toObjectId()));
assertTrue(foo.equals(rename.oldId.toObjectId()));
assertEquals(FileMode.REGULAR_FILE, rename.newMode);
assertEquals(FileMode.REGULAR_FILE, rename.oldMode);
assertEquals(ChangeType.RENAME, rename.changeType);
assertEquals("some/file.c", rename.newName);
assertEquals("some/other_file.c", rename.oldName);
DiffEntry modify = entries.get(1);
assertEquals(c, modify);
}
public void testGetEntriesMultipleRenames() throws Exception {
ObjectId foo = testDb.blob("foo").copy();
ObjectId bar = testDb.blob("bar").copy();
DiffEntry a = new DiffEntry();
a.newId = AbbreviatedObjectId.fromObjectId(foo);
a.newMode = FileMode.REGULAR_FILE;
a.newName = "some/file.c";
a.changeType = ChangeType.ADD;
DiffEntry b = new DiffEntry();
b.oldId = AbbreviatedObjectId.fromObjectId(foo);
b.oldMode = FileMode.REGULAR_FILE;
b.oldName = "some/other_file.c";
b.changeType = ChangeType.DELETE;
DiffEntry c = new DiffEntry();
c.newId = AbbreviatedObjectId.fromObjectId(bar);
c.newMode = FileMode.REGULAR_FILE;
c.newName = "README";
c.changeType = ChangeType.ADD;
DiffEntry d = new DiffEntry();
d.oldId = AbbreviatedObjectId.fromObjectId(bar);
d.oldMode = FileMode.REGULAR_FILE;
d.oldName = "REEDME";
d.changeType = ChangeType.DELETE;
rd.addDiffEntry(a);
rd.addDiffEntry(b);
rd.addDiffEntry(c);
rd.addDiffEntry(d);
List<DiffEntry> entries = rd.getEntries();
assertEquals(2, entries.size());
// The REEDME -> README renamed change should be first because the
// output should be sorted by newName
DiffEntry readme = entries.get(0);
assertNotNull(readme);
assertTrue(bar.equals(readme.newId.toObjectId()));
assertTrue(bar.equals(readme.oldId.toObjectId()));
assertEquals(FileMode.REGULAR_FILE, readme.newMode);
assertEquals(FileMode.REGULAR_FILE, readme.oldMode);
assertEquals(ChangeType.RENAME, readme.changeType);
assertEquals("README", readme.newName);
assertEquals("REEDME", readme.oldName);
DiffEntry somefile = entries.get(1);
assertNotNull(somefile);
assertTrue(foo.equals(somefile.newId.toObjectId()));
assertTrue(foo.equals(somefile.oldId.toObjectId()));
assertEquals(FileMode.REGULAR_FILE, somefile.newMode);
assertEquals(FileMode.REGULAR_FILE, somefile.oldMode);
assertEquals(ChangeType.RENAME, somefile.changeType);
assertEquals("some/file.c", somefile.newName);
assertEquals("some/other_file.c", somefile.oldName);
}
}

View File

@ -297,6 +297,7 @@ remoteDoesNotHaveSpec=Remote does not have {0} available for fetch.
remoteDoesNotSupportSmartHTTPPush=remote does not support smart HTTP push
remoteHungUpUnexpectedly=remote hung up unexpectedly
remoteNameCantBeNull=Remote name can't be null.
renamesAlreadyFound=Renames have already been found.
repositoryAlreadyExists=Repository already exists: {0}
repositoryNotFound=repository not found: {0}
requiredHashFunctionNotAvailable=Required hash function {0} not available.

View File

@ -356,6 +356,7 @@ public static JGitText get() {
/***/ public String remoteDoesNotSupportSmartHTTPPush;
/***/ public String remoteHungUpUnexpectedly;
/***/ public String remoteNameCantBeNull;
/***/ public String renamesAlreadyFound;
/***/ public String repositoryAlreadyExists;
/***/ public String repositoryNotFound;
/***/ public String requiredHashFunctionNotAvailable;

View File

@ -0,0 +1,258 @@
/*
* Copyright (C) 2010, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.diff;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import org.eclipse.jgit.JGitText;
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
import org.eclipse.jgit.errors.CorruptObjectException;
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.lib.AbbreviatedObjectId;
import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.lib.MutableObjectId;
import org.eclipse.jgit.treewalk.TreeWalk;
/** Detect and resolve object renames. */
public class RenameDetector {
private static final int EXACT_RENAME_SCORE = 100;
private static final Comparator<DiffEntry> DIFF_COMPARATOR = new Comparator<DiffEntry>() {
public int compare(DiffEntry o1, DiffEntry o2) {
return o1.newName.compareTo(o2.newName);
}
};
private final List<DiffEntry> entries = new ArrayList<DiffEntry>();
private List<DiffEntry> deleted = new ArrayList<DiffEntry>();
private List<DiffEntry> added = new ArrayList<DiffEntry>();
private boolean done = false;
/**
* Walk through a given tree walk with exactly two trees and add all
* differing files to the list of object to run rename detection on.
* <p>
* The tree walk must have two trees attached to it, as well as a filter.
* Calling this method after calling {@link #getEntries()} will result in an
* {@link IllegalStateException}.
*
* @param walk
* the TreeWalk to walk through. Must have exactly two trees.
* @throws IllegalStateException
* the {@link #getEntries()} method has already been called for
* this instance.
* @throws MissingObjectException
* {@link TreeWalk#isRecursive()} was enabled on the tree, a
* subtree was found, but the subtree object does not exist in
* this repository. The repository may be missing objects.
* @throws IncorrectObjectTypeException
* {@link TreeWalk#isRecursive()} was enabled on the tree, a
* subtree was found, and the subtree id does not denote a tree,
* but instead names some other non-tree type of object. The
* repository may have data corruption.
* @throws CorruptObjectException
* the contents of a tree did not appear to be a tree. The
* repository may have data corruption.
* @throws IOException
* a loose object or pack file could not be read.
*/
public void addTreeWalk(TreeWalk walk) throws MissingObjectException,
IncorrectObjectTypeException, CorruptObjectException, IOException {
if (done)
throw new IllegalStateException(JGitText.get().renamesAlreadyFound);
MutableObjectId idBuf = new MutableObjectId();
while (walk.next()) {
DiffEntry entry = new DiffEntry();
walk.getObjectId(idBuf, 0);
entry.oldId = AbbreviatedObjectId.fromObjectId(idBuf);
walk.getObjectId(idBuf, 1);
entry.newId = AbbreviatedObjectId.fromObjectId(idBuf);
entry.oldMode = walk.getFileMode(0);
entry.newMode = walk.getFileMode(1);
entry.newName = entry.oldName = walk.getPathString();
if (entry.oldMode == FileMode.MISSING) {
entry.changeType = ChangeType.ADD;
added.add(entry);
} else if (entry.newMode == FileMode.MISSING) {
entry.changeType = ChangeType.DELETE;
deleted.add(entry);
} else {
entry.changeType = ChangeType.MODIFY;
entries.add(entry);
}
}
}
/**
* Add a DiffEntry to the list of items to run rename detection on. Calling
* this method after calling {@link #getEntries()} will result in an
* {@link IllegalStateException}.
*
* @param entry
* the {@link DiffEntry} to add
*
* @throws IllegalStateException
* the {@link #getEntries()} method has already been called for
* this instance
*/
public void addDiffEntry(DiffEntry entry) {
if (done)
throw new IllegalStateException(JGitText.get().renamesAlreadyFound);
switch (entry.changeType) {
case ADD:
added.add(entry);
break;
case DELETE:
deleted.add(entry);
break;
case COPY:
case MODIFY:
case RENAME:
default:
entries.add(entry);
}
}
/**
* Determines which files, if any, are renames, and returns an unmodifiable
* list of {@link DiffEntry}s representing all files that have been changed
* in some way. The list will contain all modified files first
*
* @return an unmodifiable list of {@link DiffEntry}s representing all files
* that have been changed
* @throws IOException
*/
public List<DiffEntry> getEntries() throws IOException {
if (!done) {
done = true;
findExactRenames();
entries.addAll(added);
entries.addAll(deleted);
added = null;
deleted = null;
Collections.sort(entries, DIFF_COMPARATOR);
}
return Collections.unmodifiableList(entries);
}
@SuppressWarnings("unchecked")
private void findExactRenames() {
HashMap<AbbreviatedObjectId, Object> map = new HashMap<AbbreviatedObjectId, Object>();
for (DiffEntry del : deleted) {
Object old = map.put(del.oldId, del);
if (old != null) {
if (old instanceof DiffEntry) {
ArrayList<DiffEntry> tmp = new ArrayList<DiffEntry>(2);
tmp.add((DiffEntry) old);
tmp.add(del);
map.put(del.oldId, tmp);
} else {
// Must be a list of DiffEntrys
((List) old).add(del);
map.put(del.oldId, old);
}
}
}
ArrayList<DiffEntry> tempAdded = new ArrayList<DiffEntry>(added.size());
for (DiffEntry add : added) {
Object del = map.remove(add.newId);
if (del != null) {
if (del instanceof DiffEntry) {
entries.add(resolveRename(add, (DiffEntry) del,
EXACT_RENAME_SCORE));
} else {
// Must be a list of DiffEntrys
List<DiffEntry> tmp = (List<DiffEntry>) del;
entries.add(resolveRename(add, tmp.remove(0),
EXACT_RENAME_SCORE));
if (!tmp.isEmpty())
map.put(add.newId, del);
}
} else {
tempAdded.add(add);
}
}
added = tempAdded;
Collection<Object> values = map.values();
ArrayList<DiffEntry> tempDeleted = new ArrayList<DiffEntry>(values
.size());
for (Object o : values) {
if (o instanceof DiffEntry)
tempDeleted.add((DiffEntry) o);
else
tempDeleted.addAll((List<DiffEntry>) o);
}
deleted = tempDeleted;
}
private DiffEntry resolveRename(DiffEntry add, DiffEntry del, int score) {
DiffEntry renamed = new DiffEntry();
renamed.oldId = del.oldId;
renamed.oldMode = del.oldMode;
renamed.oldName = del.oldName;
renamed.newId = add.newId;
renamed.newMode = add.newMode;
renamed.newName = add.newName;
renamed.changeType = ChangeType.RENAME;
renamed.score = score;
return renamed;
}
}

View File

@ -82,6 +82,21 @@ public static final AbbreviatedObjectId fromString(final byte[] buf,
return fromHexString(buf, offset, end);
}
/**
* Convert an AbbreviatedObjectId from an {@link AnyObjectId}.
* <p>
* This method copies over all bits of the Id, and is therefore complete
* (see {@link #isComplete()}).
*
* @param id
* the {@link ObjectId} to convert from.
* @return the converted object id.
*/
public static final AbbreviatedObjectId fromObjectId(AnyObjectId id) {
return new AbbreviatedObjectId(Constants.OBJECT_ID_STRING_LENGTH,
id.w1, id.w2, id.w3, id.w4, id.w5);
}
/**
* Convert an AbbreviatedObjectId from hex characters.
*