From cb8e1e6014c9cbac9c557df519ad7e22bdcf7d7d Mon Sep 17 00:00:00 2001 From: Jeff Schumacher Date: Thu, 1 Jul 2010 15:30:46 -0700 Subject: [PATCH] Added a preliminary version of rename detection JGit does not currently do rename detection during diffs. I added a class that, given a TreeWalk to iterate over, can output a list of DiffEntry's for that TreeWalk, taking into account renames. This class only detects renames by SHA1's. More complex rename detection, along the lines of what C Git does will be added later. Change-Id: I93606ce15da70df6660651ec322ea50718dd7c04 --- .../eclipse/jgit/diff/RenameDetectorTest.java | 204 ++++++++++++++ .../org/eclipse/jgit/JGitText.properties | 1 + .../src/org/eclipse/jgit/JGitText.java | 1 + .../org/eclipse/jgit/diff/RenameDetector.java | 258 ++++++++++++++++++ .../eclipse/jgit/lib/AbbreviatedObjectId.java | 15 + 5 files changed, 479 insertions(+) create mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java new file mode 100644 index 000000000..4f84066f2 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java @@ -0,0 +1,204 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +import java.util.List; + +import org.eclipse.jgit.diff.DiffEntry.ChangeType; +import org.eclipse.jgit.junit.TestRepository; +import org.eclipse.jgit.lib.AbbreviatedObjectId; +import org.eclipse.jgit.lib.FileMode; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.RepositoryTestCase; + +public class RenameDetectorTest extends RepositoryTestCase { + + RenameDetector rd; + + TestRepository testDb; + + @Override + public void setUp() throws Exception { + super.setUp(); + testDb = new TestRepository(db); + rd = new RenameDetector(); + } + + public void testGetEntriesAddDelete() throws Exception { + ObjectId foo = testDb.blob("foo").copy(); + + DiffEntry a = new DiffEntry(); + a.newId = AbbreviatedObjectId.fromObjectId(foo); + a.newMode = FileMode.REGULAR_FILE; + a.newName = "some/file.c"; + a.changeType = ChangeType.ADD; + + DiffEntry b = new DiffEntry(); + b.oldId = AbbreviatedObjectId.fromObjectId(foo); + b.oldMode = FileMode.REGULAR_FILE; + b.oldName = "some/other_file.c"; + b.changeType = ChangeType.DELETE; + + rd.addDiffEntry(a); + rd.addDiffEntry(b); + + List entries = rd.getEntries(); + assertEquals(1, entries.size()); + + DiffEntry rename = entries.get(0); + assertNotNull(rename); + assertTrue(foo.equals(rename.newId.toObjectId())); + assertTrue(foo.equals(rename.oldId.toObjectId())); + assertEquals(FileMode.REGULAR_FILE, rename.newMode); + assertEquals(FileMode.REGULAR_FILE, rename.oldMode); + assertEquals(ChangeType.RENAME, rename.changeType); + assertEquals("some/file.c", rename.newName); + assertEquals("some/other_file.c", rename.oldName); + } + + public void testGetEntriesAddDeleteModify() throws Exception { + ObjectId foo = testDb.blob("foo").copy(); + ObjectId bar = testDb.blob("bar").copy(); + + DiffEntry a = new DiffEntry(); + a.newId = AbbreviatedObjectId.fromObjectId(foo); + a.newMode = FileMode.REGULAR_FILE; + a.newName = "some/file.c"; + a.changeType = ChangeType.ADD; + + DiffEntry b = new DiffEntry(); + b.oldId = AbbreviatedObjectId.fromObjectId(foo); + b.oldMode = FileMode.REGULAR_FILE; + b.oldName = "some/other_file.c"; + b.changeType = ChangeType.DELETE; + + DiffEntry c = new DiffEntry(); + c.newId = c.oldId = AbbreviatedObjectId.fromObjectId(bar); + c.newMode = c.oldMode = FileMode.REGULAR_FILE; + c.newName = c.oldName = "some/header.h"; + c.changeType = ChangeType.MODIFY; + + rd.addDiffEntry(a); + rd.addDiffEntry(b); + rd.addDiffEntry(c); + + List entries = rd.getEntries(); + assertEquals(2, entries.size()); + + // The renamed change should be first because the output should be + // sorted by newName + DiffEntry rename = entries.get(0); + assertNotNull(rename); + assertTrue(foo.equals(rename.newId.toObjectId())); + assertTrue(foo.equals(rename.oldId.toObjectId())); + assertEquals(FileMode.REGULAR_FILE, rename.newMode); + assertEquals(FileMode.REGULAR_FILE, rename.oldMode); + assertEquals(ChangeType.RENAME, rename.changeType); + assertEquals("some/file.c", rename.newName); + assertEquals("some/other_file.c", rename.oldName); + + DiffEntry modify = entries.get(1); + assertEquals(c, modify); + } + + public void testGetEntriesMultipleRenames() throws Exception { + ObjectId foo = testDb.blob("foo").copy(); + ObjectId bar = testDb.blob("bar").copy(); + + DiffEntry a = new DiffEntry(); + a.newId = AbbreviatedObjectId.fromObjectId(foo); + a.newMode = FileMode.REGULAR_FILE; + a.newName = "some/file.c"; + a.changeType = ChangeType.ADD; + + DiffEntry b = new DiffEntry(); + b.oldId = AbbreviatedObjectId.fromObjectId(foo); + b.oldMode = FileMode.REGULAR_FILE; + b.oldName = "some/other_file.c"; + b.changeType = ChangeType.DELETE; + + DiffEntry c = new DiffEntry(); + c.newId = AbbreviatedObjectId.fromObjectId(bar); + c.newMode = FileMode.REGULAR_FILE; + c.newName = "README"; + c.changeType = ChangeType.ADD; + + DiffEntry d = new DiffEntry(); + d.oldId = AbbreviatedObjectId.fromObjectId(bar); + d.oldMode = FileMode.REGULAR_FILE; + d.oldName = "REEDME"; + d.changeType = ChangeType.DELETE; + + rd.addDiffEntry(a); + rd.addDiffEntry(b); + rd.addDiffEntry(c); + rd.addDiffEntry(d); + + List entries = rd.getEntries(); + assertEquals(2, entries.size()); + + // The REEDME -> README renamed change should be first because the + // output should be sorted by newName + DiffEntry readme = entries.get(0); + assertNotNull(readme); + assertTrue(bar.equals(readme.newId.toObjectId())); + assertTrue(bar.equals(readme.oldId.toObjectId())); + assertEquals(FileMode.REGULAR_FILE, readme.newMode); + assertEquals(FileMode.REGULAR_FILE, readme.oldMode); + assertEquals(ChangeType.RENAME, readme.changeType); + assertEquals("README", readme.newName); + assertEquals("REEDME", readme.oldName); + + DiffEntry somefile = entries.get(1); + assertNotNull(somefile); + assertTrue(foo.equals(somefile.newId.toObjectId())); + assertTrue(foo.equals(somefile.oldId.toObjectId())); + assertEquals(FileMode.REGULAR_FILE, somefile.newMode); + assertEquals(FileMode.REGULAR_FILE, somefile.oldMode); + assertEquals(ChangeType.RENAME, somefile.changeType); + assertEquals("some/file.c", somefile.newName); + assertEquals("some/other_file.c", somefile.oldName); + } + +} diff --git a/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties b/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties index 91b67daf8..76097cb5f 100644 --- a/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties +++ b/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties @@ -297,6 +297,7 @@ remoteDoesNotHaveSpec=Remote does not have {0} available for fetch. remoteDoesNotSupportSmartHTTPPush=remote does not support smart HTTP push remoteHungUpUnexpectedly=remote hung up unexpectedly remoteNameCantBeNull=Remote name can't be null. +renamesAlreadyFound=Renames have already been found. repositoryAlreadyExists=Repository already exists: {0} repositoryNotFound=repository not found: {0} requiredHashFunctionNotAvailable=Required hash function {0} not available. diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java b/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java index 0c64b9edd..a2e1cd109 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java @@ -356,6 +356,7 @@ public static JGitText get() { /***/ public String remoteDoesNotSupportSmartHTTPPush; /***/ public String remoteHungUpUnexpectedly; /***/ public String remoteNameCantBeNull; + /***/ public String renamesAlreadyFound; /***/ public String repositoryAlreadyExists; /***/ public String repositoryNotFound; /***/ public String requiredHashFunctionNotAvailable; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java new file mode 100644 index 000000000..50d81c966 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java @@ -0,0 +1,258 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; + +import org.eclipse.jgit.JGitText; +import org.eclipse.jgit.diff.DiffEntry.ChangeType; +import org.eclipse.jgit.errors.CorruptObjectException; +import org.eclipse.jgit.errors.IncorrectObjectTypeException; +import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.lib.AbbreviatedObjectId; +import org.eclipse.jgit.lib.FileMode; +import org.eclipse.jgit.lib.MutableObjectId; +import org.eclipse.jgit.treewalk.TreeWalk; + +/** Detect and resolve object renames. */ +public class RenameDetector { + + private static final int EXACT_RENAME_SCORE = 100; + + private static final Comparator DIFF_COMPARATOR = new Comparator() { + public int compare(DiffEntry o1, DiffEntry o2) { + return o1.newName.compareTo(o2.newName); + } + }; + + private final List entries = new ArrayList(); + + private List deleted = new ArrayList(); + + private List added = new ArrayList(); + + private boolean done = false; + + /** + * Walk through a given tree walk with exactly two trees and add all + * differing files to the list of object to run rename detection on. + *

+ * The tree walk must have two trees attached to it, as well as a filter. + * Calling this method after calling {@link #getEntries()} will result in an + * {@link IllegalStateException}. + * + * @param walk + * the TreeWalk to walk through. Must have exactly two trees. + * @throws IllegalStateException + * the {@link #getEntries()} method has already been called for + * this instance. + * @throws MissingObjectException + * {@link TreeWalk#isRecursive()} was enabled on the tree, a + * subtree was found, but the subtree object does not exist in + * this repository. The repository may be missing objects. + * @throws IncorrectObjectTypeException + * {@link TreeWalk#isRecursive()} was enabled on the tree, a + * subtree was found, and the subtree id does not denote a tree, + * but instead names some other non-tree type of object. The + * repository may have data corruption. + * @throws CorruptObjectException + * the contents of a tree did not appear to be a tree. The + * repository may have data corruption. + * @throws IOException + * a loose object or pack file could not be read. + */ + public void addTreeWalk(TreeWalk walk) throws MissingObjectException, + IncorrectObjectTypeException, CorruptObjectException, IOException { + if (done) + throw new IllegalStateException(JGitText.get().renamesAlreadyFound); + MutableObjectId idBuf = new MutableObjectId(); + while (walk.next()) { + DiffEntry entry = new DiffEntry(); + walk.getObjectId(idBuf, 0); + entry.oldId = AbbreviatedObjectId.fromObjectId(idBuf); + walk.getObjectId(idBuf, 1); + entry.newId = AbbreviatedObjectId.fromObjectId(idBuf); + entry.oldMode = walk.getFileMode(0); + entry.newMode = walk.getFileMode(1); + entry.newName = entry.oldName = walk.getPathString(); + if (entry.oldMode == FileMode.MISSING) { + entry.changeType = ChangeType.ADD; + added.add(entry); + } else if (entry.newMode == FileMode.MISSING) { + entry.changeType = ChangeType.DELETE; + deleted.add(entry); + } else { + entry.changeType = ChangeType.MODIFY; + entries.add(entry); + } + } + } + + /** + * Add a DiffEntry to the list of items to run rename detection on. Calling + * this method after calling {@link #getEntries()} will result in an + * {@link IllegalStateException}. + * + * @param entry + * the {@link DiffEntry} to add + * + * @throws IllegalStateException + * the {@link #getEntries()} method has already been called for + * this instance + */ + public void addDiffEntry(DiffEntry entry) { + if (done) + throw new IllegalStateException(JGitText.get().renamesAlreadyFound); + switch (entry.changeType) { + case ADD: + added.add(entry); + break; + case DELETE: + deleted.add(entry); + break; + case COPY: + case MODIFY: + case RENAME: + default: + entries.add(entry); + } + } + + /** + * Determines which files, if any, are renames, and returns an unmodifiable + * list of {@link DiffEntry}s representing all files that have been changed + * in some way. The list will contain all modified files first + * + * @return an unmodifiable list of {@link DiffEntry}s representing all files + * that have been changed + * @throws IOException + */ + public List getEntries() throws IOException { + if (!done) { + done = true; + findExactRenames(); + entries.addAll(added); + entries.addAll(deleted); + added = null; + deleted = null; + Collections.sort(entries, DIFF_COMPARATOR); + } + return Collections.unmodifiableList(entries); + } + + @SuppressWarnings("unchecked") + private void findExactRenames() { + HashMap map = new HashMap(); + + for (DiffEntry del : deleted) { + Object old = map.put(del.oldId, del); + if (old != null) { + if (old instanceof DiffEntry) { + ArrayList tmp = new ArrayList(2); + tmp.add((DiffEntry) old); + tmp.add(del); + map.put(del.oldId, tmp); + } else { + // Must be a list of DiffEntrys + ((List) old).add(del); + map.put(del.oldId, old); + } + } + } + + ArrayList tempAdded = new ArrayList(added.size()); + + for (DiffEntry add : added) { + Object del = map.remove(add.newId); + if (del != null) { + if (del instanceof DiffEntry) { + entries.add(resolveRename(add, (DiffEntry) del, + EXACT_RENAME_SCORE)); + } else { + // Must be a list of DiffEntrys + List tmp = (List) del; + entries.add(resolveRename(add, tmp.remove(0), + EXACT_RENAME_SCORE)); + if (!tmp.isEmpty()) + map.put(add.newId, del); + } + } else { + tempAdded.add(add); + } + } + added = tempAdded; + + Collection values = map.values(); + ArrayList tempDeleted = new ArrayList(values + .size()); + for (Object o : values) { + if (o instanceof DiffEntry) + tempDeleted.add((DiffEntry) o); + else + tempDeleted.addAll((List) o); + } + deleted = tempDeleted; + } + + private DiffEntry resolveRename(DiffEntry add, DiffEntry del, int score) { + DiffEntry renamed = new DiffEntry(); + + renamed.oldId = del.oldId; + renamed.oldMode = del.oldMode; + renamed.oldName = del.oldName; + renamed.newId = add.newId; + renamed.newMode = add.newMode; + renamed.newName = add.newName; + renamed.changeType = ChangeType.RENAME; + renamed.score = score; + + return renamed; + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/AbbreviatedObjectId.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/AbbreviatedObjectId.java index a150e8fea..3f188fe0e 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/AbbreviatedObjectId.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/AbbreviatedObjectId.java @@ -82,6 +82,21 @@ public static final AbbreviatedObjectId fromString(final byte[] buf, return fromHexString(buf, offset, end); } + /** + * Convert an AbbreviatedObjectId from an {@link AnyObjectId}. + *

+ * This method copies over all bits of the Id, and is therefore complete + * (see {@link #isComplete()}). + * + * @param id + * the {@link ObjectId} to convert from. + * @return the converted object id. + */ + public static final AbbreviatedObjectId fromObjectId(AnyObjectId id) { + return new AbbreviatedObjectId(Constants.OBJECT_ID_STRING_LENGTH, + id.w1, id.w2, id.w3, id.w4, id.w5); + } + /** * Convert an AbbreviatedObjectId from hex characters. *