From 7b0b4110ed4f63efed25af2f03bf01d2715c5fbf Mon Sep 17 00:00:00 2001 From: Jeff Schumacher Date: Wed, 30 Jun 2010 16:21:49 -0700 Subject: [PATCH 01/17] Refactored code out of FileHeader to facilitate rename detection Refactored a superclass out of FileHeader called DiffEntry that holds the more general data from FileHeader that is useful in rename detection (old/new Ids, modes, names, as well as changeType and score). FileHeader is now a DiffEntry that adds Hunks, parsing abilities, etc. Change-Id: I8398728cd218f8c6e98f7a4a7f2f342391d865e4 --- .../src/org/eclipse/jgit/diff/DiffEntry.java | 174 ++++++++++++++++++ .../org/eclipse/jgit/patch/FileHeader.java | 125 +------------ 2 files changed, 176 insertions(+), 123 deletions(-) create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffEntry.java diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffEntry.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffEntry.java new file mode 100644 index 000000000..ac3ee9864 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffEntry.java @@ -0,0 +1,174 @@ +/* + * Copyright (C) 2008-2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +import org.eclipse.jgit.lib.AbbreviatedObjectId; +import org.eclipse.jgit.lib.FileMode; + +/** A value class representing a change to a file */ +public class DiffEntry { + + /** General type of change a single file-level patch describes. */ + public static enum ChangeType { + /** Add a new file to the project */ + ADD, + + /** Modify an existing file in the project (content and/or mode) */ + MODIFY, + + /** Delete an existing file from the project */ + DELETE, + + /** Rename an existing file to a new location */ + RENAME, + + /** Copy an existing file to a new location, keeping the original */ + COPY; + } + + /** File name of the old (pre-image). */ + protected String oldName; + + /** File name of the new (post-image). */ + protected String newName; + + /** Old mode of the file, if described by the patch, else null. */ + protected FileMode oldMode; + + /** New mode of the file, if described by the patch, else null. */ + protected FileMode newMode; + + /** General type of change indicated by the patch. */ + protected ChangeType changeType; + + /** Similarity score if {@link #changeType} is a copy or rename. */ + protected int score; + + /** ObjectId listed on the index line for the old (pre-image) */ + protected AbbreviatedObjectId oldId; + + /** ObjectId listed on the index line for the new (post-image) */ + protected AbbreviatedObjectId newId; + + /** + * Get the old name associated with this file. + *

+ * The meaning of the old name can differ depending on the semantic meaning + * of this patch: + *

+ * + * @return old name for this file. + */ + public String getOldName() { + return oldName; + } + + /** + * Get the new name associated with this file. + *

+ * The meaning of the new name can differ depending on the semantic meaning + * of this patch: + *

+ * + * @return new name for this file. + */ + public String getNewName() { + return newName; + } + + /** @return the old file mode, if described in the patch */ + public FileMode getOldMode() { + return oldMode; + } + + /** @return the new file mode, if described in the patch */ + public FileMode getNewMode() { + return newMode; + } + + /** @return the type of change this patch makes on {@link #getNewName()} */ + public ChangeType getChangeType() { + return changeType; + } + + /** + * @return similarity score between {@link #getOldName()} and + * {@link #getNewName()} if {@link #getChangeType()} is + * {@link ChangeType#COPY} or {@link ChangeType#RENAME}. + */ + public int getScore() { + return score; + } + + /** + * Get the old object id from the index. + * + * @return the object id; null if there is no index line + */ + public AbbreviatedObjectId getOldId() { + return oldId; + } + + /** + * Get the new object id from the index. + * + * @return the object id; null if there is no index line + */ + public AbbreviatedObjectId getNewId() { + return newId; + } + +} \ No newline at end of file diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/patch/FileHeader.java b/org.eclipse.jgit/src/org/eclipse/jgit/patch/FileHeader.java index 25dc72af2..35c2ee303 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/patch/FileHeader.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/patch/FileHeader.java @@ -60,6 +60,7 @@ import java.util.List; import org.eclipse.jgit.JGitText; +import org.eclipse.jgit.diff.DiffEntry; import org.eclipse.jgit.diff.EditList; import org.eclipse.jgit.lib.AbbreviatedObjectId; import org.eclipse.jgit.lib.Constants; @@ -69,7 +70,7 @@ import org.eclipse.jgit.util.TemporaryBuffer; /** Patch header describing an action for a single file path. */ -public class FileHeader { +public class FileHeader extends DiffEntry { /** Magical file name used for file adds or deletes. */ public static final String DEV_NULL = "/dev/null"; @@ -103,24 +104,6 @@ public class FileHeader { static final byte[] NEW_NAME = encodeASCII("+++ "); - /** General type of change a single file-level patch describes. */ - public static enum ChangeType { - /** Add a new file to the project */ - ADD, - - /** Modify an existing file in the project (content and/or mode) */ - MODIFY, - - /** Delete an existing file from the project */ - DELETE, - - /** Rename an existing file to a new location */ - RENAME, - - /** Copy an existing file to a new location, keeping the original */ - COPY; - } - /** Type of patch used by this file. */ public static enum PatchType { /** A traditional unified diff style patch of a text file. */ @@ -142,30 +125,6 @@ public static enum PatchType { /** Position 1 past the end of this file within {@link #buf}. */ int endOffset; - /** File name of the old (pre-image). */ - private String oldName; - - /** File name of the new (post-image). */ - private String newName; - - /** Old mode of the file, if described by the patch, else null. */ - private FileMode oldMode; - - /** New mode of the file, if described by the patch, else null. */ - protected FileMode newMode; - - /** General type of change indicated by the patch. */ - protected ChangeType changeType; - - /** Similarity score if {@link #changeType} is a copy or rename. */ - private int score; - - /** ObjectId listed on the index line for the old (pre-image) */ - private AbbreviatedObjectId oldId; - - /** ObjectId listed on the index line for the new (post-image) */ - protected AbbreviatedObjectId newId; - /** Type of patch used to modify this file */ PatchType patchType; @@ -312,86 +271,6 @@ private String[] extractFileLines(final Charset[] csGuess) { } } - /** - * Get the old name associated with this file. - *

- * The meaning of the old name can differ depending on the semantic meaning - * of this patch: - *

- * - * @return old name for this file. - */ - public String getOldName() { - return oldName; - } - - /** - * Get the new name associated with this file. - *

- * The meaning of the new name can differ depending on the semantic meaning - * of this patch: - *

- * - * @return new name for this file. - */ - public String getNewName() { - return newName; - } - - /** @return the old file mode, if described in the patch */ - public FileMode getOldMode() { - return oldMode; - } - - /** @return the new file mode, if described in the patch */ - public FileMode getNewMode() { - return newMode; - } - - /** @return the type of change this patch makes on {@link #getNewName()} */ - public ChangeType getChangeType() { - return changeType; - } - - /** - * @return similarity score between {@link #getOldName()} and - * {@link #getNewName()} if {@link #getChangeType()} is - * {@link ChangeType#COPY} or {@link ChangeType#RENAME}. - */ - public int getScore() { - return score; - } - - /** - * Get the old object id from the index. - * - * @return the object id; null if there is no index line - */ - public AbbreviatedObjectId getOldId() { - return oldId; - } - - /** - * Get the new object id from the index. - * - * @return the object id; null if there is no index line - */ - public AbbreviatedObjectId getNewId() { - return newId; - } - /** @return style of patch used to modify this file */ public PatchType getPatchType() { return patchType; From cb8e1e6014c9cbac9c557df519ad7e22bdcf7d7d Mon Sep 17 00:00:00 2001 From: Jeff Schumacher Date: Thu, 1 Jul 2010 15:30:46 -0700 Subject: [PATCH 02/17] Added a preliminary version of rename detection JGit does not currently do rename detection during diffs. I added a class that, given a TreeWalk to iterate over, can output a list of DiffEntry's for that TreeWalk, taking into account renames. This class only detects renames by SHA1's. More complex rename detection, along the lines of what C Git does will be added later. Change-Id: I93606ce15da70df6660651ec322ea50718dd7c04 --- .../eclipse/jgit/diff/RenameDetectorTest.java | 204 ++++++++++++++ .../org/eclipse/jgit/JGitText.properties | 1 + .../src/org/eclipse/jgit/JGitText.java | 1 + .../org/eclipse/jgit/diff/RenameDetector.java | 258 ++++++++++++++++++ .../eclipse/jgit/lib/AbbreviatedObjectId.java | 15 + 5 files changed, 479 insertions(+) create mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java new file mode 100644 index 000000000..4f84066f2 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java @@ -0,0 +1,204 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +import java.util.List; + +import org.eclipse.jgit.diff.DiffEntry.ChangeType; +import org.eclipse.jgit.junit.TestRepository; +import org.eclipse.jgit.lib.AbbreviatedObjectId; +import org.eclipse.jgit.lib.FileMode; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.RepositoryTestCase; + +public class RenameDetectorTest extends RepositoryTestCase { + + RenameDetector rd; + + TestRepository testDb; + + @Override + public void setUp() throws Exception { + super.setUp(); + testDb = new TestRepository(db); + rd = new RenameDetector(); + } + + public void testGetEntriesAddDelete() throws Exception { + ObjectId foo = testDb.blob("foo").copy(); + + DiffEntry a = new DiffEntry(); + a.newId = AbbreviatedObjectId.fromObjectId(foo); + a.newMode = FileMode.REGULAR_FILE; + a.newName = "some/file.c"; + a.changeType = ChangeType.ADD; + + DiffEntry b = new DiffEntry(); + b.oldId = AbbreviatedObjectId.fromObjectId(foo); + b.oldMode = FileMode.REGULAR_FILE; + b.oldName = "some/other_file.c"; + b.changeType = ChangeType.DELETE; + + rd.addDiffEntry(a); + rd.addDiffEntry(b); + + List entries = rd.getEntries(); + assertEquals(1, entries.size()); + + DiffEntry rename = entries.get(0); + assertNotNull(rename); + assertTrue(foo.equals(rename.newId.toObjectId())); + assertTrue(foo.equals(rename.oldId.toObjectId())); + assertEquals(FileMode.REGULAR_FILE, rename.newMode); + assertEquals(FileMode.REGULAR_FILE, rename.oldMode); + assertEquals(ChangeType.RENAME, rename.changeType); + assertEquals("some/file.c", rename.newName); + assertEquals("some/other_file.c", rename.oldName); + } + + public void testGetEntriesAddDeleteModify() throws Exception { + ObjectId foo = testDb.blob("foo").copy(); + ObjectId bar = testDb.blob("bar").copy(); + + DiffEntry a = new DiffEntry(); + a.newId = AbbreviatedObjectId.fromObjectId(foo); + a.newMode = FileMode.REGULAR_FILE; + a.newName = "some/file.c"; + a.changeType = ChangeType.ADD; + + DiffEntry b = new DiffEntry(); + b.oldId = AbbreviatedObjectId.fromObjectId(foo); + b.oldMode = FileMode.REGULAR_FILE; + b.oldName = "some/other_file.c"; + b.changeType = ChangeType.DELETE; + + DiffEntry c = new DiffEntry(); + c.newId = c.oldId = AbbreviatedObjectId.fromObjectId(bar); + c.newMode = c.oldMode = FileMode.REGULAR_FILE; + c.newName = c.oldName = "some/header.h"; + c.changeType = ChangeType.MODIFY; + + rd.addDiffEntry(a); + rd.addDiffEntry(b); + rd.addDiffEntry(c); + + List entries = rd.getEntries(); + assertEquals(2, entries.size()); + + // The renamed change should be first because the output should be + // sorted by newName + DiffEntry rename = entries.get(0); + assertNotNull(rename); + assertTrue(foo.equals(rename.newId.toObjectId())); + assertTrue(foo.equals(rename.oldId.toObjectId())); + assertEquals(FileMode.REGULAR_FILE, rename.newMode); + assertEquals(FileMode.REGULAR_FILE, rename.oldMode); + assertEquals(ChangeType.RENAME, rename.changeType); + assertEquals("some/file.c", rename.newName); + assertEquals("some/other_file.c", rename.oldName); + + DiffEntry modify = entries.get(1); + assertEquals(c, modify); + } + + public void testGetEntriesMultipleRenames() throws Exception { + ObjectId foo = testDb.blob("foo").copy(); + ObjectId bar = testDb.blob("bar").copy(); + + DiffEntry a = new DiffEntry(); + a.newId = AbbreviatedObjectId.fromObjectId(foo); + a.newMode = FileMode.REGULAR_FILE; + a.newName = "some/file.c"; + a.changeType = ChangeType.ADD; + + DiffEntry b = new DiffEntry(); + b.oldId = AbbreviatedObjectId.fromObjectId(foo); + b.oldMode = FileMode.REGULAR_FILE; + b.oldName = "some/other_file.c"; + b.changeType = ChangeType.DELETE; + + DiffEntry c = new DiffEntry(); + c.newId = AbbreviatedObjectId.fromObjectId(bar); + c.newMode = FileMode.REGULAR_FILE; + c.newName = "README"; + c.changeType = ChangeType.ADD; + + DiffEntry d = new DiffEntry(); + d.oldId = AbbreviatedObjectId.fromObjectId(bar); + d.oldMode = FileMode.REGULAR_FILE; + d.oldName = "REEDME"; + d.changeType = ChangeType.DELETE; + + rd.addDiffEntry(a); + rd.addDiffEntry(b); + rd.addDiffEntry(c); + rd.addDiffEntry(d); + + List entries = rd.getEntries(); + assertEquals(2, entries.size()); + + // The REEDME -> README renamed change should be first because the + // output should be sorted by newName + DiffEntry readme = entries.get(0); + assertNotNull(readme); + assertTrue(bar.equals(readme.newId.toObjectId())); + assertTrue(bar.equals(readme.oldId.toObjectId())); + assertEquals(FileMode.REGULAR_FILE, readme.newMode); + assertEquals(FileMode.REGULAR_FILE, readme.oldMode); + assertEquals(ChangeType.RENAME, readme.changeType); + assertEquals("README", readme.newName); + assertEquals("REEDME", readme.oldName); + + DiffEntry somefile = entries.get(1); + assertNotNull(somefile); + assertTrue(foo.equals(somefile.newId.toObjectId())); + assertTrue(foo.equals(somefile.oldId.toObjectId())); + assertEquals(FileMode.REGULAR_FILE, somefile.newMode); + assertEquals(FileMode.REGULAR_FILE, somefile.oldMode); + assertEquals(ChangeType.RENAME, somefile.changeType); + assertEquals("some/file.c", somefile.newName); + assertEquals("some/other_file.c", somefile.oldName); + } + +} diff --git a/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties b/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties index 91b67daf8..76097cb5f 100644 --- a/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties +++ b/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties @@ -297,6 +297,7 @@ remoteDoesNotHaveSpec=Remote does not have {0} available for fetch. remoteDoesNotSupportSmartHTTPPush=remote does not support smart HTTP push remoteHungUpUnexpectedly=remote hung up unexpectedly remoteNameCantBeNull=Remote name can't be null. +renamesAlreadyFound=Renames have already been found. repositoryAlreadyExists=Repository already exists: {0} repositoryNotFound=repository not found: {0} requiredHashFunctionNotAvailable=Required hash function {0} not available. diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java b/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java index 0c64b9edd..a2e1cd109 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java @@ -356,6 +356,7 @@ public static JGitText get() { /***/ public String remoteDoesNotSupportSmartHTTPPush; /***/ public String remoteHungUpUnexpectedly; /***/ public String remoteNameCantBeNull; + /***/ public String renamesAlreadyFound; /***/ public String repositoryAlreadyExists; /***/ public String repositoryNotFound; /***/ public String requiredHashFunctionNotAvailable; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java new file mode 100644 index 000000000..50d81c966 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java @@ -0,0 +1,258 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; + +import org.eclipse.jgit.JGitText; +import org.eclipse.jgit.diff.DiffEntry.ChangeType; +import org.eclipse.jgit.errors.CorruptObjectException; +import org.eclipse.jgit.errors.IncorrectObjectTypeException; +import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.lib.AbbreviatedObjectId; +import org.eclipse.jgit.lib.FileMode; +import org.eclipse.jgit.lib.MutableObjectId; +import org.eclipse.jgit.treewalk.TreeWalk; + +/** Detect and resolve object renames. */ +public class RenameDetector { + + private static final int EXACT_RENAME_SCORE = 100; + + private static final Comparator DIFF_COMPARATOR = new Comparator() { + public int compare(DiffEntry o1, DiffEntry o2) { + return o1.newName.compareTo(o2.newName); + } + }; + + private final List entries = new ArrayList(); + + private List deleted = new ArrayList(); + + private List added = new ArrayList(); + + private boolean done = false; + + /** + * Walk through a given tree walk with exactly two trees and add all + * differing files to the list of object to run rename detection on. + *

+ * The tree walk must have two trees attached to it, as well as a filter. + * Calling this method after calling {@link #getEntries()} will result in an + * {@link IllegalStateException}. + * + * @param walk + * the TreeWalk to walk through. Must have exactly two trees. + * @throws IllegalStateException + * the {@link #getEntries()} method has already been called for + * this instance. + * @throws MissingObjectException + * {@link TreeWalk#isRecursive()} was enabled on the tree, a + * subtree was found, but the subtree object does not exist in + * this repository. The repository may be missing objects. + * @throws IncorrectObjectTypeException + * {@link TreeWalk#isRecursive()} was enabled on the tree, a + * subtree was found, and the subtree id does not denote a tree, + * but instead names some other non-tree type of object. The + * repository may have data corruption. + * @throws CorruptObjectException + * the contents of a tree did not appear to be a tree. The + * repository may have data corruption. + * @throws IOException + * a loose object or pack file could not be read. + */ + public void addTreeWalk(TreeWalk walk) throws MissingObjectException, + IncorrectObjectTypeException, CorruptObjectException, IOException { + if (done) + throw new IllegalStateException(JGitText.get().renamesAlreadyFound); + MutableObjectId idBuf = new MutableObjectId(); + while (walk.next()) { + DiffEntry entry = new DiffEntry(); + walk.getObjectId(idBuf, 0); + entry.oldId = AbbreviatedObjectId.fromObjectId(idBuf); + walk.getObjectId(idBuf, 1); + entry.newId = AbbreviatedObjectId.fromObjectId(idBuf); + entry.oldMode = walk.getFileMode(0); + entry.newMode = walk.getFileMode(1); + entry.newName = entry.oldName = walk.getPathString(); + if (entry.oldMode == FileMode.MISSING) { + entry.changeType = ChangeType.ADD; + added.add(entry); + } else if (entry.newMode == FileMode.MISSING) { + entry.changeType = ChangeType.DELETE; + deleted.add(entry); + } else { + entry.changeType = ChangeType.MODIFY; + entries.add(entry); + } + } + } + + /** + * Add a DiffEntry to the list of items to run rename detection on. Calling + * this method after calling {@link #getEntries()} will result in an + * {@link IllegalStateException}. + * + * @param entry + * the {@link DiffEntry} to add + * + * @throws IllegalStateException + * the {@link #getEntries()} method has already been called for + * this instance + */ + public void addDiffEntry(DiffEntry entry) { + if (done) + throw new IllegalStateException(JGitText.get().renamesAlreadyFound); + switch (entry.changeType) { + case ADD: + added.add(entry); + break; + case DELETE: + deleted.add(entry); + break; + case COPY: + case MODIFY: + case RENAME: + default: + entries.add(entry); + } + } + + /** + * Determines which files, if any, are renames, and returns an unmodifiable + * list of {@link DiffEntry}s representing all files that have been changed + * in some way. The list will contain all modified files first + * + * @return an unmodifiable list of {@link DiffEntry}s representing all files + * that have been changed + * @throws IOException + */ + public List getEntries() throws IOException { + if (!done) { + done = true; + findExactRenames(); + entries.addAll(added); + entries.addAll(deleted); + added = null; + deleted = null; + Collections.sort(entries, DIFF_COMPARATOR); + } + return Collections.unmodifiableList(entries); + } + + @SuppressWarnings("unchecked") + private void findExactRenames() { + HashMap map = new HashMap(); + + for (DiffEntry del : deleted) { + Object old = map.put(del.oldId, del); + if (old != null) { + if (old instanceof DiffEntry) { + ArrayList tmp = new ArrayList(2); + tmp.add((DiffEntry) old); + tmp.add(del); + map.put(del.oldId, tmp); + } else { + // Must be a list of DiffEntrys + ((List) old).add(del); + map.put(del.oldId, old); + } + } + } + + ArrayList tempAdded = new ArrayList(added.size()); + + for (DiffEntry add : added) { + Object del = map.remove(add.newId); + if (del != null) { + if (del instanceof DiffEntry) { + entries.add(resolveRename(add, (DiffEntry) del, + EXACT_RENAME_SCORE)); + } else { + // Must be a list of DiffEntrys + List tmp = (List) del; + entries.add(resolveRename(add, tmp.remove(0), + EXACT_RENAME_SCORE)); + if (!tmp.isEmpty()) + map.put(add.newId, del); + } + } else { + tempAdded.add(add); + } + } + added = tempAdded; + + Collection values = map.values(); + ArrayList tempDeleted = new ArrayList(values + .size()); + for (Object o : values) { + if (o instanceof DiffEntry) + tempDeleted.add((DiffEntry) o); + else + tempDeleted.addAll((List) o); + } + deleted = tempDeleted; + } + + private DiffEntry resolveRename(DiffEntry add, DiffEntry del, int score) { + DiffEntry renamed = new DiffEntry(); + + renamed.oldId = del.oldId; + renamed.oldMode = del.oldMode; + renamed.oldName = del.oldName; + renamed.newId = add.newId; + renamed.newMode = add.newMode; + renamed.newName = add.newName; + renamed.changeType = ChangeType.RENAME; + renamed.score = score; + + return renamed; + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/AbbreviatedObjectId.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/AbbreviatedObjectId.java index a150e8fea..3f188fe0e 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/AbbreviatedObjectId.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/AbbreviatedObjectId.java @@ -82,6 +82,21 @@ public static final AbbreviatedObjectId fromString(final byte[] buf, return fromHexString(buf, offset, end); } + /** + * Convert an AbbreviatedObjectId from an {@link AnyObjectId}. + *

+ * This method copies over all bits of the Id, and is therefore complete + * (see {@link #isComplete()}). + * + * @param id + * the {@link ObjectId} to convert from. + * @return the converted object id. + */ + public static final AbbreviatedObjectId fromObjectId(AnyObjectId id) { + return new AbbreviatedObjectId(Constants.OBJECT_ID_STRING_LENGTH, + id.w1, id.w2, id.w3, id.w4, id.w5); + } + /** * Convert an AbbreviatedObjectId from hex characters. * From 978535b09080edcc01302e80b37b9e1263c21db3 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Fri, 2 Jul 2010 17:29:09 -0700 Subject: [PATCH 03/17] Implement similarity based rename detection Content similarity based rename detection is performed only after a linear time detection is performed using exact content match on the ObjectIds. Any names which were paired up during that exact match phase are excluded from the inexact similarity based rename, which reduces the space that must be considered. During rename detection two entries cannot be marked as a rename if they are different types of files. This prevents a symlink from being renamed to a regular file, even if their blob content appears to be similar, or is identical. Efficiently comparing two files is performed by building up two hash indexes and hashing lines or short blocks from each file, counting the number of bytes that each line or block represents. Instead of using a standard java.util.HashMap, we use a custom open hashing scheme similiar to what we use in ObjecIdSubclassMap. This permits us to have a very light-weight hash, with very little memory overhead per cell stored. As we only need two ints per record in the map (line/block key and number of bytes), we collapse them into a single long inside of a long array, making very efficient use of available memory when we create the index table. We only need object headers for the index structure itself, and the index table, but not per-cell. This offers a massive space savings over using java.util.HashMap. The score calculation is done by approximating how many bytes are the same between the two inputs (which for a delta would be how much is copied from the base into the result). The score is derived by dividing the approximate number of bytes in common into the length of the larger of the two input files. Right now the SimilarityIndex table should average about 1/2 full, which means we waste about 50% of our memory on empty entries after we are done indexing a file and sort the table's contents. If memory becomes an issue we could discard the table and copy all records over to a new array that is properly sized. Building the index requires O(M + N log N) time, where M is the size of the input file in bytes, and N is the number of unique lines/blocks in the file. The N log N time constraint comes from the sort of the index table that is necessary to perform linear time matching against another SimilarityIndex created for a different file. To actually perform the rename detection, a SxD matrix is created, placing the sources (aka deletions) along one dimension and the destinations (aka additions) along the other. A simple O(S x D) loop examines every cell in this matrix. A SimilarityIndex is built along the row and reused for each column compare along that row, avoiding the costly index rebuild at the row level. A future improvement would be to load a smaller square matrix into SimilarityIndexes and process everything in that sub-matrix before discarding the column dimension and moving down to the next sub-matrix block along that same grid of rows. An optional ProgressMonitor is permitted to be passed in, allowing applications to see the progress of the detector as it works through the matrix cells. This provides some indication of current status for very long running renames. The default line/block hash function used by the SimilarityIndex may not be optimal, and may produce too many collisions. It is borrowed from RawText's hash, which is used to quickly skip out of a longer equality test if two lines have different hash functions. We may need to refine this hash in the future, in order to minimize the number of collisions we get on common source files. Based on a handful of test commits in JGit (especially my own recent rename repository refactoring series), this rename detector produces output that is very close to C Git. The content similarity scores are sometimes off by 1%, which is most probably caused by our SimilarityIndex type using a different hash function than C Git uses when it computes the delta size between any two objects in the rename matrix. Bug: 318504 Change-Id: I11dff969e8a2e4cf252636d857d2113053bdd9dc Signed-off-by: Shawn O. Pearce --- .../eclipse/jgit/diff/RenameDetectorTest.java | 321 ++++++++----- .../jgit/diff/SimilarityIndexTest.java | 137 ++++++ .../eclipse/jgit/patch/FileHeaderTest.java | 5 +- .../org/eclipse/jgit/patch/PatchCcTest.java | 9 +- .../org/eclipse/jgit/JGitText.properties | 3 + .../src/org/eclipse/jgit/JGitText.java | 3 + .../src/org/eclipse/jgit/diff/DiffEntry.java | 139 +++++- .../org/eclipse/jgit/diff/RenameDetector.java | 420 ++++++++++++------ .../eclipse/jgit/diff/SimilarityIndex.java | 295 ++++++++++++ .../jgit/diff/SimilarityRenameDetector.java | 295 ++++++++++++ .../org/eclipse/jgit/patch/FileHeader.java | 3 - 11 files changed, 1367 insertions(+), 263 deletions(-) create mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/SimilarityIndexTest.java create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java index 4f84066f2..c4cb600db 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java @@ -53,152 +53,237 @@ import org.eclipse.jgit.lib.RepositoryTestCase; public class RenameDetectorTest extends RepositoryTestCase { + private static final String PATH_A = "src/A"; + private static final String PATH_H = "src/H"; + private static final String PATH_Q = "src/Q"; - RenameDetector rd; + private RenameDetector rd; - TestRepository testDb; + private TestRepository testDb; @Override public void setUp() throws Exception { super.setUp(); testDb = new TestRepository(db); - rd = new RenameDetector(); + rd = new RenameDetector(db); } - public void testGetEntriesAddDelete() throws Exception { - ObjectId foo = testDb.blob("foo").copy(); + public void testExactRename_OneRename() throws Exception { + ObjectId foo = blob("foo"); - DiffEntry a = new DiffEntry(); - a.newId = AbbreviatedObjectId.fromObjectId(foo); - a.newMode = FileMode.REGULAR_FILE; - a.newName = "some/file.c"; - a.changeType = ChangeType.ADD; + DiffEntry a = DiffEntry.add(PATH_A, foo); + DiffEntry b = DiffEntry.delete(PATH_Q, foo); - DiffEntry b = new DiffEntry(); - b.oldId = AbbreviatedObjectId.fromObjectId(foo); - b.oldMode = FileMode.REGULAR_FILE; - b.oldName = "some/other_file.c"; - b.changeType = ChangeType.DELETE; + rd.add(a); + rd.add(b); - rd.addDiffEntry(a); - rd.addDiffEntry(b); - - List entries = rd.getEntries(); + List entries = rd.compute(); assertEquals(1, entries.size()); - - DiffEntry rename = entries.get(0); - assertNotNull(rename); - assertTrue(foo.equals(rename.newId.toObjectId())); - assertTrue(foo.equals(rename.oldId.toObjectId())); - assertEquals(FileMode.REGULAR_FILE, rename.newMode); - assertEquals(FileMode.REGULAR_FILE, rename.oldMode); - assertEquals(ChangeType.RENAME, rename.changeType); - assertEquals("some/file.c", rename.newName); - assertEquals("some/other_file.c", rename.oldName); + assertRename(b, a, 100, entries.get(0)); } - public void testGetEntriesAddDeleteModify() throws Exception { - ObjectId foo = testDb.blob("foo").copy(); - ObjectId bar = testDb.blob("bar").copy(); + public void testExactRename_OneRenameOneModify() throws Exception { + ObjectId foo = blob("foo"); + ObjectId bar = blob("bar"); - DiffEntry a = new DiffEntry(); - a.newId = AbbreviatedObjectId.fromObjectId(foo); - a.newMode = FileMode.REGULAR_FILE; - a.newName = "some/file.c"; - a.changeType = ChangeType.ADD; + DiffEntry a = DiffEntry.add(PATH_A, foo); + DiffEntry b = DiffEntry.delete(PATH_Q, foo); - DiffEntry b = new DiffEntry(); - b.oldId = AbbreviatedObjectId.fromObjectId(foo); - b.oldMode = FileMode.REGULAR_FILE; - b.oldName = "some/other_file.c"; - b.changeType = ChangeType.DELETE; - - DiffEntry c = new DiffEntry(); + DiffEntry c = DiffEntry.modify(PATH_H); c.newId = c.oldId = AbbreviatedObjectId.fromObjectId(bar); - c.newMode = c.oldMode = FileMode.REGULAR_FILE; - c.newName = c.oldName = "some/header.h"; - c.changeType = ChangeType.MODIFY; - rd.addDiffEntry(a); - rd.addDiffEntry(b); - rd.addDiffEntry(c); + rd.add(a); + rd.add(b); + rd.add(c); - List entries = rd.getEntries(); + List entries = rd.compute(); assertEquals(2, entries.size()); - - // The renamed change should be first because the output should be - // sorted by newName - DiffEntry rename = entries.get(0); - assertNotNull(rename); - assertTrue(foo.equals(rename.newId.toObjectId())); - assertTrue(foo.equals(rename.oldId.toObjectId())); - assertEquals(FileMode.REGULAR_FILE, rename.newMode); - assertEquals(FileMode.REGULAR_FILE, rename.oldMode); - assertEquals(ChangeType.RENAME, rename.changeType); - assertEquals("some/file.c", rename.newName); - assertEquals("some/other_file.c", rename.oldName); - - DiffEntry modify = entries.get(1); - assertEquals(c, modify); + assertRename(b, a, 100, entries.get(0)); + assertSame(c, entries.get(1)); } - public void testGetEntriesMultipleRenames() throws Exception { - ObjectId foo = testDb.blob("foo").copy(); - ObjectId bar = testDb.blob("bar").copy(); + public void testExactRename_ManyRenames() throws Exception { + ObjectId foo = blob("foo"); + ObjectId bar = blob("bar"); - DiffEntry a = new DiffEntry(); - a.newId = AbbreviatedObjectId.fromObjectId(foo); - a.newMode = FileMode.REGULAR_FILE; - a.newName = "some/file.c"; - a.changeType = ChangeType.ADD; + DiffEntry a = DiffEntry.add(PATH_A, foo); + DiffEntry b = DiffEntry.delete(PATH_Q, foo); - DiffEntry b = new DiffEntry(); - b.oldId = AbbreviatedObjectId.fromObjectId(foo); - b.oldMode = FileMode.REGULAR_FILE; - b.oldName = "some/other_file.c"; - b.changeType = ChangeType.DELETE; + DiffEntry c = DiffEntry.add("README", bar); + DiffEntry d = DiffEntry.delete("REEDME", bar); - DiffEntry c = new DiffEntry(); - c.newId = AbbreviatedObjectId.fromObjectId(bar); - c.newMode = FileMode.REGULAR_FILE; - c.newName = "README"; - c.changeType = ChangeType.ADD; + rd.add(a); + rd.add(b); + rd.add(c); + rd.add(d); - DiffEntry d = new DiffEntry(); - d.oldId = AbbreviatedObjectId.fromObjectId(bar); - d.oldMode = FileMode.REGULAR_FILE; - d.oldName = "REEDME"; - d.changeType = ChangeType.DELETE; - - rd.addDiffEntry(a); - rd.addDiffEntry(b); - rd.addDiffEntry(c); - rd.addDiffEntry(d); - - List entries = rd.getEntries(); + List entries = rd.compute(); assertEquals(2, entries.size()); - - // The REEDME -> README renamed change should be first because the - // output should be sorted by newName - DiffEntry readme = entries.get(0); - assertNotNull(readme); - assertTrue(bar.equals(readme.newId.toObjectId())); - assertTrue(bar.equals(readme.oldId.toObjectId())); - assertEquals(FileMode.REGULAR_FILE, readme.newMode); - assertEquals(FileMode.REGULAR_FILE, readme.oldMode); - assertEquals(ChangeType.RENAME, readme.changeType); - assertEquals("README", readme.newName); - assertEquals("REEDME", readme.oldName); - - DiffEntry somefile = entries.get(1); - assertNotNull(somefile); - assertTrue(foo.equals(somefile.newId.toObjectId())); - assertTrue(foo.equals(somefile.oldId.toObjectId())); - assertEquals(FileMode.REGULAR_FILE, somefile.newMode); - assertEquals(FileMode.REGULAR_FILE, somefile.oldMode); - assertEquals(ChangeType.RENAME, somefile.changeType); - assertEquals("some/file.c", somefile.newName); - assertEquals("some/other_file.c", somefile.oldName); + assertRename(d, c, 100, entries.get(0)); + assertRename(b, a, 100, entries.get(1)); } + public void testInexactRename_OnePair() throws Exception { + ObjectId aId = blob("foo\nbar\nbaz\n"); + ObjectId bId = blob("foo\nbar\nblah\n"); + + DiffEntry a = DiffEntry.add(PATH_A, aId); + DiffEntry b = DiffEntry.delete(PATH_Q, bId); + + rd.add(a); + rd.add(b); + + List entries = rd.compute(); + assertEquals(1, entries.size()); + assertRename(b, a, 61, entries.get(0)); + } + + public void testInexactRename_OneRenameTwoUnrelatedFiles() throws Exception { + ObjectId aId = blob("foo\nbar\nbaz\n"); + ObjectId bId = blob("foo\nbar\nblah\n"); + DiffEntry a = DiffEntry.add(PATH_A, aId); + DiffEntry b = DiffEntry.delete(PATH_Q, bId); + + ObjectId cId = blob("some\nsort\nof\ntext\n"); + ObjectId dId = blob("completely\nunrelated\ntext\n"); + DiffEntry c = DiffEntry.add("c", cId); + DiffEntry d = DiffEntry.delete("d", dId); + + rd.add(a); + rd.add(b); + rd.add(c); + rd.add(d); + + List entries = rd.compute(); + assertEquals(3, entries.size()); + assertSame(c, entries.get(0)); + assertSame(d, entries.get(1)); + assertRename(b, a, 61, entries.get(2)); + } + + public void testInexactRename_LastByteDifferent() throws Exception { + ObjectId aId = blob("foo\nbar\na"); + ObjectId bId = blob("foo\nbar\nb"); + + DiffEntry a = DiffEntry.add(PATH_A, aId); + DiffEntry b = DiffEntry.delete(PATH_Q, bId); + + rd.add(a); + rd.add(b); + + List entries = rd.compute(); + assertEquals(1, entries.size()); + assertRename(b, a, 88, entries.get(0)); + } + + public void testInexactRenames_OnePair2() throws Exception { + ObjectId aId = blob("ab\nab\nab\nac\nad\nae\n"); + ObjectId bId = blob("ac\nab\nab\nab\naa\na0\na1\n"); + + DiffEntry a = DiffEntry.add(PATH_A, aId); + DiffEntry b = DiffEntry.delete(PATH_Q, bId); + + rd.add(a); + rd.add(b); + rd.setRenameScore(50); + + List entries = rd.compute(); + assertEquals(1, entries.size()); + assertRename(b, a, 57, entries.get(0)); + } + + public void testNoRenames_SingleByteFiles() throws Exception { + ObjectId aId = blob("a"); + ObjectId bId = blob("b"); + + DiffEntry a = DiffEntry.add(PATH_A, aId); + DiffEntry b = DiffEntry.delete(PATH_Q, bId); + + rd.add(a); + rd.add(b); + + List entries = rd.compute(); + assertEquals(2, entries.size()); + assertSame(a, entries.get(0)); + assertSame(b, entries.get(1)); + } + + public void testNoRenames_EmptyFile1() throws Exception { + ObjectId aId = blob(""); + DiffEntry a = DiffEntry.add(PATH_A, aId); + + rd.add(a); + + List entries = rd.compute(); + assertEquals(1, entries.size()); + assertSame(a, entries.get(0)); + } + + public void testNoRenames_EmptyFile2() throws Exception { + ObjectId aId = blob(""); + ObjectId bId = blob("blah"); + + DiffEntry a = DiffEntry.add(PATH_A, aId); + DiffEntry b = DiffEntry.delete(PATH_Q, bId); + + rd.add(a); + rd.add(b); + + List entries = rd.compute(); + assertEquals(2, entries.size()); + assertSame(a, entries.get(0)); + assertSame(b, entries.get(1)); + } + + public void testNoRenames_SymlinkAndFile() throws Exception { + ObjectId aId = blob("src/dest"); + + DiffEntry a = DiffEntry.add(PATH_A, aId); + DiffEntry b = DiffEntry.delete(PATH_Q, aId); + b.oldMode = FileMode.SYMLINK; + + rd.add(a); + rd.add(b); + + List entries = rd.compute(); + assertEquals(2, entries.size()); + assertSame(a, entries.get(0)); + assertSame(b, entries.get(1)); + } + + public void testNoRenames_GitlinkAndFile() throws Exception { + ObjectId aId = blob("src/dest"); + + DiffEntry a = DiffEntry.add(PATH_A, aId); + DiffEntry b = DiffEntry.delete(PATH_Q, aId); + b.oldMode = FileMode.GITLINK; + + rd.add(a); + rd.add(b); + + List entries = rd.compute(); + assertEquals(2, entries.size()); + assertSame(a, entries.get(0)); + assertSame(b, entries.get(1)); + } + + private ObjectId blob(String content) throws Exception { + return testDb.blob(content).copy(); + } + + private static void assertRename(DiffEntry o, DiffEntry n, int score, + DiffEntry rename) { + assertEquals(ChangeType.RENAME, rename.getChangeType()); + + assertEquals(o.getOldName(), rename.getOldName()); + assertEquals(n.getNewName(), rename.getNewName()); + + assertEquals(o.getOldMode(), rename.getOldMode()); + assertEquals(n.getNewMode(), rename.getNewMode()); + + assertEquals(o.getOldId(), rename.getOldId()); + assertEquals(n.getNewId(), rename.getNewId()); + + assertEquals(score, rename.getScore()); + } } diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/SimilarityIndexTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/SimilarityIndexTest.java new file mode 100644 index 000000000..9ab745fac --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/SimilarityIndexTest.java @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +import junit.framework.TestCase; + +import org.eclipse.jgit.lib.Constants; + +public class SimilarityIndexTest extends TestCase { + public void testIndexing() { + SimilarityIndex si = hash("" // + + "A\n" // + + "B\n" // + + "D\n" // + + "B\n" // + ); + + int key_A = keyFor("A\n"); + int key_B = keyFor("B\n"); + int key_D = keyFor("D\n"); + assertTrue(key_A != key_B && key_A != key_D && key_B != key_D); + + assertEquals(3, si.size()); + assertEquals(2, si.count(si.findIndex(key_A))); + assertEquals(4, si.count(si.findIndex(key_B))); + assertEquals(2, si.count(si.findIndex(key_D))); + } + + public void testCommonScore_SameFiles() { + String text = "" // + + "A\n" // + + "B\n" // + + "D\n" // + + "B\n"; + SimilarityIndex src = hash(text); + SimilarityIndex dst = hash(text); + assertEquals(8, src.common(dst)); + assertEquals(8, dst.common(src)); + + assertEquals(100, src.score(dst)); + assertEquals(100, dst.score(src)); + } + + public void testCommonScore_EmptyFiles() { + SimilarityIndex src = hash(""); + SimilarityIndex dst = hash(""); + assertEquals(0, src.common(dst)); + assertEquals(0, dst.common(src)); + } + + public void testCommonScore_TotallyDifferentFiles() { + SimilarityIndex src = hash("A\n"); + SimilarityIndex dst = hash("D\n"); + assertEquals(0, src.common(dst)); + assertEquals(0, dst.common(src)); + } + + public void testCommonScore_SimiliarBy75() { + SimilarityIndex src = hash("A\nB\nC\nD\n"); + SimilarityIndex dst = hash("A\nB\nC\nQ\n"); + assertEquals(6, src.common(dst)); + assertEquals(6, dst.common(src)); + + assertEquals(75, src.score(dst)); + assertEquals(75, dst.score(src)); + } + + private static SimilarityIndex hash(String text) { + SimilarityIndex src = new SimilarityIndex() { + @Override + void hash(byte[] raw, int ptr, final int end) { + while (ptr < end) { + int hash = raw[ptr] & 0xff; + int start = ptr; + do { + int c = raw[ptr++] & 0xff; + if (c == '\n') + break; + } while (ptr < end && ptr - start < 64); + add(hash, ptr - start); + } + } + }; + byte[] raw = Constants.encode(text); + src.setFileSize(raw.length); + src.hash(raw, 0, raw.length); + src.sort(); + return src; + } + + private static int keyFor(String line) { + SimilarityIndex si = hash(line); + assertEquals("single line scored", 1, si.size()); + return si.key(0); + } +} diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/patch/FileHeaderTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/patch/FileHeaderTest.java index 8d9e302a8..17e99779c 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/patch/FileHeaderTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/patch/FileHeaderTest.java @@ -45,6 +45,7 @@ import junit.framework.TestCase; +import org.eclipse.jgit.diff.DiffEntry; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.FileMode; import org.eclipse.jgit.lib.ObjectId; @@ -153,7 +154,7 @@ public void testParseUnicodeName_NewFile() { assertParse(fh); assertEquals("/dev/null", fh.getOldName()); - assertSame(FileHeader.DEV_NULL, fh.getOldName()); + assertSame(DiffEntry.DEV_NULL, fh.getOldName()); assertEquals("\u00c5ngstr\u00f6m", fh.getNewName()); assertSame(FileHeader.ChangeType.ADD, fh.getChangeType()); @@ -179,7 +180,7 @@ public void testParseUnicodeName_DeleteFile() { assertEquals("\u00c5ngstr\u00f6m", fh.getOldName()); assertEquals("/dev/null", fh.getNewName()); - assertSame(FileHeader.DEV_NULL, fh.getNewName()); + assertSame(DiffEntry.DEV_NULL, fh.getNewName()); assertSame(FileHeader.ChangeType.DELETE, fh.getChangeType()); assertSame(FileHeader.PatchType.UNIFIED, fh.getPatchType()); diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/patch/PatchCcTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/patch/PatchCcTest.java index e97d373d9..1d879cba6 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/patch/PatchCcTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/patch/PatchCcTest.java @@ -46,10 +46,11 @@ import java.io.IOException; import java.io.InputStream; -import org.eclipse.jgit.lib.FileMode; - import junit.framework.TestCase; +import org.eclipse.jgit.diff.DiffEntry; +import org.eclipse.jgit.lib.FileMode; + public class PatchCcTest extends TestCase { public void testParse_OneFileCc() throws IOException { final Patch p = parseTestPatchFile(); @@ -113,7 +114,7 @@ public void testParse_CcNewFile() throws IOException { final CombinedFileHeader cfh = (CombinedFileHeader) p.getFiles().get(0); - assertSame(FileHeader.DEV_NULL, cfh.getOldName()); + assertSame(DiffEntry.DEV_NULL, cfh.getOldName()); assertEquals("d", cfh.getNewName()); assertEquals(187, cfh.startOffset); @@ -168,7 +169,7 @@ public void testParse_CcDeleteFile() throws IOException { final CombinedFileHeader cfh = (CombinedFileHeader) p.getFiles().get(0); assertEquals("a", cfh.getOldName()); - assertSame(FileHeader.DEV_NULL, cfh.getNewName()); + assertSame(DiffEntry.DEV_NULL, cfh.getNewName()); assertEquals(187, cfh.startOffset); diff --git a/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties b/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties index 76097cb5f..a8b9a33a4 100644 --- a/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties +++ b/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties @@ -298,6 +298,8 @@ remoteDoesNotSupportSmartHTTPPush=remote does not support smart HTTP push remoteHungUpUnexpectedly=remote hung up unexpectedly remoteNameCantBeNull=Remote name can't be null. renamesAlreadyFound=Renames have already been found. +renamesFindingByContent=Finding renames by content similarity +renamesFindingExact=Finding exact renames repositoryAlreadyExists=Repository already exists: {0} repositoryNotFound=repository not found: {0} requiredHashFunctionNotAvailable=Required hash function {0} not available. @@ -307,6 +309,7 @@ shortCompressedStreamAt=Short compressed stream at {0} shortReadOfBlock=Short read of block. shortReadOfOptionalDIRCExtensionExpectedAnotherBytes=Short read of optional DIRC extension {0}; expected another {1} bytes within the section. shortSkipOfBlock=Short skip of block. +similarityScoreMustBeWithinBounds=Similarity score must be between 0 and 100. smartHTTPPushDisabled=smart HTTP push disabled sourceDestinationMustMatch=Source/Destination must match. sourceIsNotAWildcard=Source is not a wildcard. diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java b/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java index a2e1cd109..ed5192cf5 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java @@ -357,6 +357,8 @@ public static JGitText get() { /***/ public String remoteHungUpUnexpectedly; /***/ public String remoteNameCantBeNull; /***/ public String renamesAlreadyFound; + /***/ public String renamesFindingByContent; + /***/ public String renamesFindingExact; /***/ public String repositoryAlreadyExists; /***/ public String repositoryNotFound; /***/ public String requiredHashFunctionNotAvailable; @@ -366,6 +368,7 @@ public static JGitText get() { /***/ public String shortReadOfBlock; /***/ public String shortReadOfOptionalDIRCExtensionExpectedAnotherBytes; /***/ public String shortSkipOfBlock; + /***/ public String similarityScoreMustBeWithinBounds; /***/ public String smartHTTPPushDisabled; /***/ public String sourceDestinationMustMatch; /***/ public String sourceIsNotAWildcard; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffEntry.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffEntry.java index ac3ee9864..e41ec5670 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffEntry.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffEntry.java @@ -43,11 +43,25 @@ package org.eclipse.jgit.diff; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + import org.eclipse.jgit.lib.AbbreviatedObjectId; +import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.FileMode; +import org.eclipse.jgit.lib.MutableObjectId; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.treewalk.TreeWalk; /** A value class representing a change to a file */ public class DiffEntry { + private static final AbbreviatedObjectId A_ZERO = AbbreviatedObjectId + .fromObjectId(ObjectId.zeroId()); + + /** Magical file name used for file adds or deletes. */ + public static final String DEV_NULL = "/dev/null"; /** General type of change a single file-level patch describes. */ public static enum ChangeType { @@ -67,6 +81,130 @@ public static enum ChangeType { COPY; } + /** + * Convert the TreeWalk into DiffEntry headers. + * + * @param walk + * the TreeWalk to walk through. Must have exactly two trees. + * @return headers describing the changed files. + * @throws IOException + * the repository cannot be accessed. + */ + public static List scan(TreeWalk walk) throws IOException { + List r = new ArrayList(); + MutableObjectId idBuf = new MutableObjectId(); + while (walk.next()) { + DiffEntry entry = new DiffEntry(); + + walk.getObjectId(idBuf, 0); + entry.oldId = AbbreviatedObjectId.fromObjectId(idBuf); + + walk.getObjectId(idBuf, 1); + entry.newId = AbbreviatedObjectId.fromObjectId(idBuf); + + entry.oldMode = walk.getFileMode(0); + entry.newMode = walk.getFileMode(1); + entry.newName = entry.oldName = walk.getPathString(); + + if (entry.oldMode == FileMode.MISSING) { + entry.oldName = DiffEntry.DEV_NULL; + entry.changeType = ChangeType.ADD; + r.add(entry); + + } else if (entry.newMode == FileMode.MISSING) { + entry.newName = DiffEntry.DEV_NULL; + entry.changeType = ChangeType.DELETE; + r.add(entry); + + } else { + entry.changeType = ChangeType.MODIFY; + if (RenameDetector.sameType(entry.oldMode, entry.newMode)) + r.add(entry); + else + r.addAll(breakModify(entry)); + } + } + return r; + } + + static DiffEntry add(String path, AnyObjectId id) { + DiffEntry e = new DiffEntry(); + e.oldId = A_ZERO; + e.oldMode = FileMode.MISSING; + e.oldName = DEV_NULL; + + e.newId = AbbreviatedObjectId.fromObjectId(id); + e.newMode = FileMode.REGULAR_FILE; + e.newName = path; + e.changeType = ChangeType.ADD; + return e; + } + + static DiffEntry delete(String path, AnyObjectId id) { + DiffEntry e = new DiffEntry(); + e.oldId = AbbreviatedObjectId.fromObjectId(id); + e.oldMode = FileMode.REGULAR_FILE; + e.oldName = path; + + e.newId = A_ZERO; + e.newMode = FileMode.MISSING; + e.newName = DEV_NULL; + e.changeType = ChangeType.DELETE; + return e; + } + + static DiffEntry modify(String path) { + DiffEntry e = new DiffEntry(); + e.oldMode = FileMode.REGULAR_FILE; + e.oldName = path; + + e.newMode = FileMode.REGULAR_FILE; + e.newName = path; + e.changeType = ChangeType.MODIFY; + return e; + } + + static List breakModify(DiffEntry entry) { + DiffEntry del = new DiffEntry(); + del.oldId = entry.getOldId(); + del.oldMode = entry.getOldMode(); + del.oldName = entry.getOldName(); + + del.newId = A_ZERO; + del.newMode = FileMode.MISSING; + del.newName = DiffEntry.DEV_NULL; + del.changeType = ChangeType.DELETE; + + DiffEntry add = new DiffEntry(); + add.oldId = A_ZERO; + add.oldMode = FileMode.MISSING; + add.oldName = DiffEntry.DEV_NULL; + + add.newId = entry.getNewId(); + add.newMode = entry.getNewMode(); + add.newName = entry.getNewName(); + add.changeType = ChangeType.ADD; + return Arrays.asList(del, add); + } + + static DiffEntry pair(ChangeType changeType, DiffEntry src, DiffEntry dst, + int score) { + DiffEntry r = new DiffEntry(); + + r.oldId = src.oldId; + r.oldMode = src.oldMode; + r.oldName = src.oldName; + + r.newId = dst.newId; + r.newMode = dst.newMode; + r.newName = dst.newName; + + r.changeType = changeType; + r.score = score; + + return r; + } + /** File name of the old (pre-image). */ protected String oldName; @@ -170,5 +308,4 @@ public AbbreviatedObjectId getOldId() { public AbbreviatedObjectId getNewId() { return newId; } - } \ No newline at end of file diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java index 50d81c966..8116e87ef 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java @@ -53,22 +53,48 @@ import org.eclipse.jgit.JGitText; import org.eclipse.jgit.diff.DiffEntry.ChangeType; -import org.eclipse.jgit.errors.CorruptObjectException; -import org.eclipse.jgit.errors.IncorrectObjectTypeException; -import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.lib.AbbreviatedObjectId; +import org.eclipse.jgit.lib.Config; import org.eclipse.jgit.lib.FileMode; -import org.eclipse.jgit.lib.MutableObjectId; -import org.eclipse.jgit.treewalk.TreeWalk; +import org.eclipse.jgit.lib.NullProgressMonitor; +import org.eclipse.jgit.lib.ProgressMonitor; +import org.eclipse.jgit.lib.Repository; /** Detect and resolve object renames. */ public class RenameDetector { - private static final int EXACT_RENAME_SCORE = 100; private static final Comparator DIFF_COMPARATOR = new Comparator() { - public int compare(DiffEntry o1, DiffEntry o2) { - return o1.newName.compareTo(o2.newName); + public int compare(DiffEntry a, DiffEntry b) { + int cmp = nameOf(a).compareTo(nameOf(b)); + if (cmp == 0) + cmp = sortOf(a.getChangeType()) - sortOf(b.getChangeType()); + return cmp; + } + + private String nameOf(DiffEntry ent) { + // Sort by the new name, unless the change is a delete. On + // deletes the new name is /dev/null, so we sort instead by + // the old name. + // + if (ent.changeType == ChangeType.DELETE) + return ent.oldName; + return ent.newName; + } + + private int sortOf(ChangeType changeType) { + // Sort deletes before adds so that a major type change for + // a file path (such as symlink to regular file) will first + // remove the path, then add it back with the new type. + // + switch (changeType) { + case DELETE: + return 1; + case ADD: + return 2; + default: + return 10; + } } }; @@ -78,181 +104,305 @@ public int compare(DiffEntry o1, DiffEntry o2) { private List added = new ArrayList(); - private boolean done = false; + private boolean done; + + private final Repository repo; + + /** Similarity score required to pair an add/delete as a rename. */ + private int renameScore = 60; + + /** Limit in the number of files to consider for renames. */ + private int renameLimit; + + /** Set if the number of adds or deletes was over the limit. */ + private boolean overRenameLimit; /** - * Walk through a given tree walk with exactly two trees and add all - * differing files to the list of object to run rename detection on. - *

- * The tree walk must have two trees attached to it, as well as a filter. - * Calling this method after calling {@link #getEntries()} will result in an - * {@link IllegalStateException}. + * Create a new rename detector for the given repository * - * @param walk - * the TreeWalk to walk through. Must have exactly two trees. - * @throws IllegalStateException - * the {@link #getEntries()} method has already been called for - * this instance. - * @throws MissingObjectException - * {@link TreeWalk#isRecursive()} was enabled on the tree, a - * subtree was found, but the subtree object does not exist in - * this repository. The repository may be missing objects. - * @throws IncorrectObjectTypeException - * {@link TreeWalk#isRecursive()} was enabled on the tree, a - * subtree was found, and the subtree id does not denote a tree, - * but instead names some other non-tree type of object. The - * repository may have data corruption. - * @throws CorruptObjectException - * the contents of a tree did not appear to be a tree. The - * repository may have data corruption. - * @throws IOException - * a loose object or pack file could not be read. + * @param repo + * the repository to use for rename detection */ - public void addTreeWalk(TreeWalk walk) throws MissingObjectException, - IncorrectObjectTypeException, CorruptObjectException, IOException { + public RenameDetector(Repository repo) { + this.repo = repo; + + Config cfg = repo.getConfig(); + renameLimit = cfg.getInt("diff", "renamelimit", 200); + } + + /** + * @return minimum score required to pair an add/delete as a rename. The + * score ranges are within the bounds of (0, 100). + */ + public int getRenameScore() { + return renameScore; + } + + /** + * Set the minimum score required to pair an add/delete as a rename. + *

+ * When comparing two files together their score must be greater than or + * equal to the rename score for them to be considered a rename match. The + * score is computed based on content similarity, so a score of 60 implies + * that approximately 60% of the bytes in the files are identical. + * + * @param score + * new rename score, must be within (0, 100). + */ + public void setRenameScore(int score) { + if (score < 0 || score > 100) + throw new IllegalArgumentException( + JGitText.get().similarityScoreMustBeWithinBounds); + renameScore = score; + } + + /** @return limit on number of paths to perform inexact rename detection. */ + public int getRenameLimit() { + return renameLimit; + } + + /** + * Set the limit on the number of files to perform inexact rename detection. + *

+ * The rename detector has to build a square matrix of the rename limit on + * each side, then perform that many file compares to determine similarity. + * If 1000 files are added, and 1000 files are deleted, a 1000*1000 matrix + * must be allocated, and 1,000,000 file compares may need to be performed. + * + * @param limit + * new file limit. + */ + public void setRenameLimit(int limit) { + renameLimit = limit; + } + + /** + * Check if the detector is over the rename limit. + *

+ * This method can be invoked either before or after {@code getEntries} has + * been used to perform rename detection. + * + * @return true if the detector has more file additions or removals than the + * rename limit is currently set to. In such configurations the + * detector will skip expensive computation. + */ + public boolean isOverRenameLimit() { + if (done) + return overRenameLimit; + int cnt = Math.max(added.size(), deleted.size()); + return getRenameLimit() != 0 && getRenameLimit() < cnt; + } + + /** + * Add entries to be considered for rename detection. + * + * @param entriesToAdd + * one or more entries to add. + * @throws IllegalStateException + * if {@code getEntries} was already invoked. + */ + public void addAll(Collection entriesToAdd) { if (done) throw new IllegalStateException(JGitText.get().renamesAlreadyFound); - MutableObjectId idBuf = new MutableObjectId(); - while (walk.next()) { - DiffEntry entry = new DiffEntry(); - walk.getObjectId(idBuf, 0); - entry.oldId = AbbreviatedObjectId.fromObjectId(idBuf); - walk.getObjectId(idBuf, 1); - entry.newId = AbbreviatedObjectId.fromObjectId(idBuf); - entry.oldMode = walk.getFileMode(0); - entry.newMode = walk.getFileMode(1); - entry.newName = entry.oldName = walk.getPathString(); - if (entry.oldMode == FileMode.MISSING) { - entry.changeType = ChangeType.ADD; + + for (DiffEntry entry : entriesToAdd) { + switch (entry.getChangeType()) { + case ADD: added.add(entry); - } else if (entry.newMode == FileMode.MISSING) { - entry.changeType = ChangeType.DELETE; + break; + + case DELETE: deleted.add(entry); - } else { - entry.changeType = ChangeType.MODIFY; - entries.add(entry); + break; + + case MODIFY: + if (sameType(entry.getOldMode(), entry.getNewMode())) + entries.add(entry); + else + entries.addAll(DiffEntry.breakModify(entry)); + break; + + case COPY: + case RENAME: + default: + entriesToAdd.add(entry); } } } /** - * Add a DiffEntry to the list of items to run rename detection on. Calling - * this method after calling {@link #getEntries()} will result in an - * {@link IllegalStateException}. + * Add an entry to be considered for rename detection. * * @param entry - * the {@link DiffEntry} to add - * + * to add. * @throws IllegalStateException - * the {@link #getEntries()} method has already been called for - * this instance + * if {@code getEntries} was already invoked. */ - public void addDiffEntry(DiffEntry entry) { - if (done) - throw new IllegalStateException(JGitText.get().renamesAlreadyFound); - switch (entry.changeType) { - case ADD: - added.add(entry); - break; - case DELETE: - deleted.add(entry); - break; - case COPY: - case MODIFY: - case RENAME: - default: - entries.add(entry); - } + public void add(DiffEntry entry) { + addAll(Collections.singletonList(entry)); } /** - * Determines which files, if any, are renames, and returns an unmodifiable - * list of {@link DiffEntry}s representing all files that have been changed - * in some way. The list will contain all modified files first + * Detect renames in the current file set. + *

+ * This convenience function runs without a progress monitor. * * @return an unmodifiable list of {@link DiffEntry}s representing all files - * that have been changed + * that have been changed. * @throws IOException + * file contents cannot be read from the repository. */ - public List getEntries() throws IOException { + public List compute() throws IOException { + return compute(NullProgressMonitor.INSTANCE); + } + + /** + * Detect renames in the current file set. + * + * @param pm + * report progress during the detection phases. + * @return an unmodifiable list of {@link DiffEntry}s representing all files + * that have been changed. + * @throws IOException + * file contents cannot be read from the repository. + */ + public List compute(ProgressMonitor pm) throws IOException { if (!done) { done = true; - findExactRenames(); + + if (pm == null) + pm = NullProgressMonitor.INSTANCE; + findExactRenames(pm); + findContentRenames(pm); + entries.addAll(added); - entries.addAll(deleted); added = null; + + entries.addAll(deleted); deleted = null; + Collections.sort(entries, DIFF_COMPARATOR); } return Collections.unmodifiableList(entries); } - @SuppressWarnings("unchecked") - private void findExactRenames() { - HashMap map = new HashMap(); + private void findContentRenames(ProgressMonitor pm) throws IOException { + int cnt = Math.max(added.size(), deleted.size()); + if (cnt == 0) + return; + if (getRenameLimit() == 0 || cnt <= getRenameLimit()) { + SimilarityRenameDetector d; + + d = new SimilarityRenameDetector(repo, deleted, added); + d.setRenameScore(getRenameScore()); + d.compute(pm); + deleted = d.getLeftOverSources(); + added = d.getLeftOverDestinations(); + entries.addAll(d.getMatches()); + } else { + overRenameLimit = true; + } + } + + @SuppressWarnings("unchecked") + private void findExactRenames(ProgressMonitor pm) { + if (added.isEmpty() || deleted.isEmpty()) + return; + + pm.beginTask(JGitText.get().renamesFindingExact, // + added.size() + deleted.size()); + + HashMap map = new HashMap(); for (DiffEntry del : deleted) { Object old = map.put(del.oldId, del); - if (old != null) { - if (old instanceof DiffEntry) { - ArrayList tmp = new ArrayList(2); - tmp.add((DiffEntry) old); - tmp.add(del); - map.put(del.oldId, tmp); - } else { - // Must be a list of DiffEntrys - ((List) old).add(del); - map.put(del.oldId, old); - } + if (old instanceof DiffEntry) { + ArrayList list = new ArrayList(2); + list.add((DiffEntry) old); + list.add(del); + map.put(del.oldId, list); + + } else if (old != null) { + // Must be a list of DiffEntries + ((List) old).add(del); + map.put(del.oldId, old); } + pm.update(1); } - ArrayList tempAdded = new ArrayList(added.size()); - - for (DiffEntry add : added) { - Object del = map.remove(add.newId); - if (del != null) { - if (del instanceof DiffEntry) { - entries.add(resolveRename(add, (DiffEntry) del, - EXACT_RENAME_SCORE)); + ArrayList left = new ArrayList(added.size()); + for (DiffEntry dst : added) { + Object del = map.get(dst.newId); + if (del instanceof DiffEntry) { + DiffEntry e = (DiffEntry) del; + if (sameType(e.oldMode, dst.newMode)) { + if (e.changeType == ChangeType.DELETE) { + e.changeType = ChangeType.RENAME; + entries.add(exactRename(e, dst)); + } else { + entries.add(exactCopy(e, dst)); + } } else { - // Must be a list of DiffEntrys - List tmp = (List) del; - entries.add(resolveRename(add, tmp.remove(0), - EXACT_RENAME_SCORE)); - if (!tmp.isEmpty()) - map.put(add.newId, del); + left.add(dst); } + + } else if (del != null) { + List list = (List) del; + DiffEntry best = null; + for (DiffEntry e : list) { + if (best == null && sameType(e.oldMode, dst.newMode)) + best = e; + } + if (best != null) { + if (best.changeType == ChangeType.DELETE) { + best.changeType = ChangeType.RENAME; + entries.add(exactRename(best, dst)); + } else { + entries.add(exactCopy(best, dst)); + } + } else { + left.add(dst); + } + } else { - tempAdded.add(add); + left.add(dst); + } + pm.update(1); + } + added = left; + + deleted = new ArrayList(map.size()); + for (Object o : map.values()) { + if (o instanceof DiffEntry) { + DiffEntry e = (DiffEntry) o; + if (e.changeType == ChangeType.DELETE) + deleted.add(e); + } else { + List list = (List) o; + for (DiffEntry e : list) { + if (e.changeType == ChangeType.DELETE) + deleted.add(e); + } } } - added = tempAdded; - - Collection values = map.values(); - ArrayList tempDeleted = new ArrayList(values - .size()); - for (Object o : values) { - if (o instanceof DiffEntry) - tempDeleted.add((DiffEntry) o); - else - tempDeleted.addAll((List) o); - } - deleted = tempDeleted; + pm.endTask(); } - private DiffEntry resolveRename(DiffEntry add, DiffEntry del, int score) { - DiffEntry renamed = new DiffEntry(); + static boolean sameType(FileMode a, FileMode b) { + // Files have to be of the same type in order to rename them. + // We would never want to rename a file to a gitlink, or a + // symlink to a file. + // + int aType = a.getBits() & FileMode.TYPE_MASK; + int bType = b.getBits() & FileMode.TYPE_MASK; + return aType == bType; + } - renamed.oldId = del.oldId; - renamed.oldMode = del.oldMode; - renamed.oldName = del.oldName; - renamed.newId = add.newId; - renamed.newMode = add.newMode; - renamed.newName = add.newName; - renamed.changeType = ChangeType.RENAME; - renamed.score = score; + private static DiffEntry exactRename(DiffEntry src, DiffEntry dst) { + return DiffEntry.pair(ChangeType.RENAME, src, dst, EXACT_RENAME_SCORE); + } - return renamed; + private static DiffEntry exactCopy(DiffEntry src, DiffEntry dst) { + return DiffEntry.pair(ChangeType.COPY, src, dst, EXACT_RENAME_SCORE); } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java new file mode 100644 index 000000000..d6026b5be --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java @@ -0,0 +1,295 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +import java.util.Arrays; + +import org.eclipse.jgit.lib.ObjectLoader; + +/** + * Index structure of lines/blocks in one file. + *

+ * This structure can be used to compute an approximation of the similarity + * between two files. The index is used by {@link SimilarityRenameDetector} to + * compute scores between files. + *

+ * To save space in memory, this index uses a space efficient encoding which + * will not exceed 1 MiB per instance. The index starts out at a smaller size + * (closer to 2 KiB), but may grow as more distinct blocks within the scanned + * file are discovered. + */ +class SimilarityIndex { + /** The {@link #idHash} table stops growing at {@code 1 << MAX_HASH_BITS}. */ + private static final int MAX_HASH_BITS = 17; + + /** The {@link #idHash} table will not grow bigger than this, ever. */ + private static final int MAX_HASH_SIZE = 1 << MAX_HASH_BITS; + + /** Prime just before {@link #MAX_HASH_SIZE}. */ + private static final int P = 131071; + + /** + * Shift to apply before storing a key. + *

+ * Within the 64 bit table record space, we leave the highest bit unset so + * all values are positive, and we need {@link #MAX_HASH_BITS} bits for the + * keys. The lower 32 bits are used to count bytes impacted. + */ + private static final int KEY_SHIFT = 64 - 1 - MAX_HASH_BITS; + + /** Total size of the file we hashed into the structure. */ + private long fileSize; + + /** Number of non-zero entries in {@link #idHash}. */ + private int idSize; + + /** + * Pairings of content keys and counters. + *

+ * Slots in the table are actually two ints wedged into a single long. The + * upper {@link #MAX_HASH_BITS} bits stores the content key, and the + * remaining lower bits stores the number of bytes associated with that key. + * Empty slots are denoted by 0, which cannot occur because the count cannot + * be 0. Values can only be positive, which we enforce during key addition. + */ + private long[] idHash; + + SimilarityIndex() { + idHash = new long[256]; + } + + long getFileSize() { + return fileSize; + } + + void setFileSize(long size) { + fileSize = size; + } + + void hash(ObjectLoader obj) { + byte[] raw = obj.getCachedBytes(); + setFileSize(raw.length); + hash(raw, 0, raw.length); + } + + void hash(byte[] raw, int ptr, final int end) { + while (ptr < end) { + int hash = 5381; + int start = ptr; + + // Hash one line, or one block, whichever occurs first. + do { + int c = raw[ptr++] & 0xff; + if (c == '\n') + break; + hash = (hash << 5) ^ c; + } while (ptr < end && ptr - start < 64); + add(hash, ptr - start); + } + } + + /** + * Sort the internal table so it can be used for efficient scoring. + *

+ * Once sorted, additional lines/blocks cannot be added to the index. + */ + void sort() { + // Sort the array. All of the empty space will wind up at the front, + // because we forced all of the keys to always be positive. Later + // we only work with the back half of the array. + // + Arrays.sort(idHash); + } + + int score(SimilarityIndex dst) { + long max = Math.max(fileSize, dst.fileSize); + return (int) ((common(dst) * 100L) / max); + } + + int common(SimilarityIndex dst) { + return common(this, dst); + } + + private static int common(SimilarityIndex src, SimilarityIndex dst) { + int srcIdx = src.packedIndex(0); + int dstIdx = dst.packedIndex(0); + long[] srcHash = src.idHash; + long[] dstHash = dst.idHash; + return common(srcHash, srcIdx, dstHash, dstIdx); + } + + private static int common(long[] srcHash, int srcIdx, // + long[] dstHash, int dstIdx) { + if (srcIdx == srcHash.length || dstIdx == dstHash.length) + return 0; + + int common = 0; + int srcKey = keyOf(srcHash[srcIdx]); + int dstKey = keyOf(dstHash[dstIdx]); + + for (;;) { + if (srcKey == dstKey) { + common += countOf(dstHash[dstIdx]); + + if (++srcIdx == srcHash.length) + break; + srcKey = keyOf(srcHash[srcIdx]); + + if (++dstIdx == dstHash.length) + break; + dstKey = keyOf(dstHash[dstIdx]); + + } else if (srcKey < dstKey) { + // Regions of src which do not appear in dst. + if (++srcIdx == srcHash.length) + break; + srcKey = keyOf(srcHash[srcIdx]); + + } else /* if (srcKey > dstKey) */{ + // Regions of dst which do not appear in dst. + if (++dstIdx == dstHash.length) + break; + dstKey = keyOf(dstHash[dstIdx]); + } + } + + return common; + } + + // Testing only + int size() { + return idSize; + } + + // Testing only + int key(int idx) { + return keyOf(idHash[packedIndex(idx)]); + } + + // Testing only + long count(int idx) { + return countOf(idHash[packedIndex(idx)]); + } + + // Brute force approach only for testing. + int findIndex(int key) { + for (int i = 0; i < idSize; i++) + if (key(i) == key) + return i; + return -1; + } + + private int packedIndex(int idx) { + return (idHash.length - idSize) + idx; + } + + void add(int key, int cnt) { + key = hash(key); + int j = slot(key); + for (;;) { + long v = idHash[j]; + if (v == 0) { + // Empty slot in the table, store here. + if (shouldGrow()) { + grow(); + j = slot(key); + continue; + } + idHash[j] = (((long) key) << KEY_SHIFT) | cnt; + idSize++; + return; + + } else if (keyOf(v) == key) { + // Same key, increment the counter. + idHash[j] = v + cnt; + return; + + } else if (++j >= idHash.length) { + j = 0; + } + } + } + + private static int hash(int key) { + // Make the key fit into our table. Since we have a maximum size + // that we cap the table at, all keys get squashed before going + // into the table. This prevents overflow. + // + return (key >>> 1) % P; + } + + private int slot(int key) { + return key % idHash.length; + } + + private boolean shouldGrow() { + int n = idHash.length; + return n < MAX_HASH_SIZE && n <= idSize * 2; + } + + private void grow() { + long[] oldHash = idHash; + int oldSize = idHash.length; + + idHash = new long[2 * oldSize]; + for (int i = 0; i < oldSize; i++) { + long v = oldHash[i]; + if (v != 0) { + int j = slot(keyOf(v)); + while (idHash[j] != 0) + if (++j >= idHash.length) + j = 0; + idHash[j] = v; + } + } + } + + private static int keyOf(long v) { + return (int) (v >>> KEY_SHIFT); + } + + private static int countOf(long v) { + return (int) v; + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java new file mode 100644 index 000000000..c92b1e3bb --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java @@ -0,0 +1,295 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.eclipse.jgit.JGitText; +import org.eclipse.jgit.diff.DiffEntry.ChangeType; +import org.eclipse.jgit.lib.FileMode; +import org.eclipse.jgit.lib.NullProgressMonitor; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.ProgressMonitor; +import org.eclipse.jgit.lib.Repository; + +class SimilarityRenameDetector { + /** + * Number of bits we need to express an index into src or dst list. + *

+ * This must be 28, giving us a limit of 2^28 entries in either list, which + * is an insane limit of 536,870,912 file names being considered in a single + * rename pass. The other 8 bits are used to store the score, while staying + * under 127 so the long doesn't go negative. + */ + private static final int BITS_PER_INDEX = 28; + + private static final int INDEX_MASK = (1 << BITS_PER_INDEX) - 1; + + private static final int SCORE_SHIFT = 2 * BITS_PER_INDEX; + + private final Repository repo; + + /** + * All sources to consider for copies or renames. + *

+ * A source is typically a {@link ChangeType#DELETE} change, but could be + * another type when trying to perform copy detection concurrently with + * rename detection. + */ + private List srcs; + + /** + * All destinations to consider looking for a rename. + *

+ * A destination is typically an {@link ChangeType#ADD}, as the name has + * just come into existence, and we want to discover where its initial + * content came from. + */ + private List dsts; + + /** + * Matrix of all examined file pairs, and their scores. + *

+ * The upper 8 bits of each long stores the score, but the score is bounded + * to be in the range (0, 128] so that the highest bit is never set, and all + * entries are therefore positive. + *

+ * List indexes to an element of {@link #srcs} and {@link #dsts} are encoded + * as the lower two groups of 28 bits, respectively, but the encoding is + * inverted, so that 0 is expressed as {@code (1 << 28) - 1}. This sorts + * lower list indices later in the matrix, giving precedence to files whose + * names sort earlier in the tree. + */ + private long[] matrix; + + /** Score a pair must exceed to be considered a rename. */ + private int renameScore = 60; + + private List out; + + SimilarityRenameDetector(Repository repo, List srcs, + List dsts) { + this.repo = repo; + this.srcs = srcs; + this.dsts = dsts; + } + + void setRenameScore(int score) { + renameScore = score; + } + + void compute(ProgressMonitor pm) throws IOException { + if (pm == null) + pm = NullProgressMonitor.INSTANCE; + + pm.beginTask(JGitText.get().renamesFindingByContent, // + 2 * srcs.size() * dsts.size()); + + int mNext = buildMatrix(pm); + out = new ArrayList(Math.min(mNext, dsts.size())); + + // Match rename pairs on a first come, first serve basis until + // we have looked at everything that is above our minimum score. + // + for (--mNext; mNext >= 0; mNext--) { + long ent = matrix[mNext]; + int sIdx = srcFile(ent); + int dIdx = dstFile(ent); + DiffEntry s = srcs.get(sIdx); + DiffEntry d = dsts.get(dIdx); + + if (d == null) { + pm.update(1); + continue; // was already matched earlier + } + + ChangeType type; + if (s.changeType == ChangeType.DELETE) { + // First use of this source file. Tag it as a rename so we + // later know it is already been used as a rename, other + // matches (if any) will claim themselves as copies instead. + // + s.changeType = ChangeType.RENAME; + type = ChangeType.RENAME; + } else { + type = ChangeType.COPY; + } + + out.add(DiffEntry.pair(type, s, d, score(ent))); + dsts.set(dIdx, null); // Claim the destination was matched. + pm.update(1); + } + + srcs = compactSrcList(srcs); + dsts = compactDstList(dsts); + pm.endTask(); + } + + List getMatches() { + return out; + } + + List getLeftOverSources() { + return srcs; + } + + List getLeftOverDestinations() { + return dsts; + } + + private static List compactSrcList(List in) { + ArrayList r = new ArrayList(in.size()); + for (DiffEntry e : in) { + if (e.changeType == ChangeType.DELETE) + r.add(e); + } + return r; + } + + private static List compactDstList(List in) { + ArrayList r = new ArrayList(in.size()); + for (DiffEntry e : in) { + if (e != null) + r.add(e); + } + return r; + } + + private int buildMatrix(ProgressMonitor pm) throws IOException { + // Allocate for the worst-case scenario where every pair has a + // score that we need to consider. We might not need that many. + // + matrix = new long[srcs.size() * dsts.size()]; + + // Consider each pair of files, if the score is above the minimum + // threshold we need record that scoring in the matrix so we can + // later find the best matches. + // + int mNext = 0; + for (int srcIdx = 0; srcIdx < srcs.size(); srcIdx++) { + DiffEntry srcEnt = srcs.get(srcIdx); + if (!isFile(srcEnt.oldMode)) { + pm.update(dsts.size()); + continue; + } + + SimilarityIndex s = hash(srcEnt.oldId.toObjectId()); + for (int dstIdx = 0; dstIdx < dsts.size(); dstIdx++) { + DiffEntry dstEnt = dsts.get(dstIdx); + + if (!isFile(dstEnt.newMode)) { + pm.update(1); + continue; + } + + if (!RenameDetector.sameType(srcEnt.oldMode, dstEnt.newMode)) { + pm.update(1); + continue; + } + + SimilarityIndex d = hash(dstEnt.newId.toObjectId()); + int score = s.score(d); + + if (score < renameScore) { + pm.update(1); + continue; + } + + matrix[mNext++] = encode(score, srcIdx, dstIdx); + pm.update(1); + } + } + + // Sort everything in the range we populated, which might be the + // entire matrix, or just a smaller slice if we had some bad low + // scoring pairs. + // + Arrays.sort(matrix, 0, mNext); + return mNext; + } + + private SimilarityIndex hash(ObjectId objectId) throws IOException { + SimilarityIndex r = new SimilarityIndex(); + r.hash(repo.openObject(objectId)); + r.sort(); + return r; + } + + private static int score(long value) { + return (int) (value >>> SCORE_SHIFT); + } + + private static int srcFile(long value) { + return decodeFile(((int) (value >>> BITS_PER_INDEX)) & INDEX_MASK); + } + + private static int dstFile(long value) { + return decodeFile(((int) value) & INDEX_MASK); + } + + private static long encode(int score, int srcIdx, int dstIdx) { + return (((long) score) << SCORE_SHIFT) // + | (encodeFile(srcIdx) << BITS_PER_INDEX) // + | encodeFile(dstIdx); + } + + private static long encodeFile(int idx) { + // We invert the index so that the first file in the list sorts + // later in the table. This permits us to break ties favoring + // earlier names over later ones. + // + return INDEX_MASK - idx; + } + + private static int decodeFile(int v) { + return INDEX_MASK - v; + } + + private static boolean isFile(FileMode mode) { + return (mode.getBits() & FileMode.TYPE_MASK) == FileMode.TYPE_FILE; + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/patch/FileHeader.java b/org.eclipse.jgit/src/org/eclipse/jgit/patch/FileHeader.java index 35c2ee303..f2f2f2e74 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/patch/FileHeader.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/patch/FileHeader.java @@ -71,9 +71,6 @@ /** Patch header describing an action for a single file path. */ public class FileHeader extends DiffEntry { - /** Magical file name used for file adds or deletes. */ - public static final String DEV_NULL = "/dev/null"; - private static final byte[] OLD_MODE = encodeASCII("old mode "); private static final byte[] NEW_MODE = encodeASCII("new mode "); From 04a9d23b9ad1a49b81d6ffb0e5cf7637ba2c6d27 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Sat, 3 Jul 2010 15:49:07 -0700 Subject: [PATCH 04/17] log, diff: Add rename detection support Implement rename detection in the command line diff and log commands. Also support --name-status, -p and -U flags, as these can be quite useful to view more detail. All of the Git patch file formatting code is now moved over to the DiffFormatter class. This permits us to reuse it in any context, including inside of IDEs. Change-Id: I687ccba34e18105a07e0a439d2181c323209d96c Signed-off-by: Shawn O. Pearce --- .../org/eclipse/jgit/pgm/CLIText.properties | 4 + .../src/org/eclipse/jgit/pgm/Diff.java | 148 ++++++++-------- .../src/org/eclipse/jgit/pgm/Log.java | 56 +++++- .../eclipse/jgit/pgm/RevWalkTextBuiltin.java | 2 +- .../org/eclipse/jgit/diff/DiffFormatter.java | 160 ++++++++++++++++++ 5 files changed, 300 insertions(+), 70 deletions(-) diff --git a/org.eclipse.jgit.pgm/resources/org/eclipse/jgit/pgm/CLIText.properties b/org.eclipse.jgit.pgm/resources/org/eclipse/jgit/pgm/CLIText.properties index d13c47d29..67357be1d 100644 --- a/org.eclipse.jgit.pgm/resources/org/eclipse/jgit/pgm/CLIText.properties +++ b/org.eclipse.jgit.pgm/resources/org/eclipse/jgit/pgm/CLIText.properties @@ -66,6 +66,7 @@ metaVar_directory=DIRECTORY metaVar_file=FILE metaVar_gitDir=GIT_DIR metaVar_hostName=HOSTNAME +metaVar_linesOfContext=lines metaVar_message=message metaVar_name=name metaVar_object=object @@ -139,6 +140,7 @@ usage_cloneRepositoryIntoNewDir=Clone a repository into a new directory usage_configureTheServiceInDaemonServicename=configure the service in daemon.servicename usage_deleteBranchEvenIfNotMerged=delete branch (even if not merged) usage_deleteFullyMergedBranch=delete fully merged branch +usage_detectRenames=detect renamed files usage_directoriesToExport=directories to export usage_disableTheServiceInAllRepositories=disable the service in all repositories usage_displayAListOfAllRegisteredJgitCommands=Display a list of all registered jgit commands @@ -160,6 +162,7 @@ usage_listBothRemoteTrackingAndLocalBranches=list both remote-tracking and local usage_listCreateOrDeleteBranches=List, create, or delete branches usage_logAllPretty=format:%H %ct %P' output=log --all '--pretty=format:%H %ct %P' output usage_moveRenameABranch=move/rename a branch +usage_nameStatus=show only name and status of files usage_outputFile=Output file usage_path=path usage_performFsckStyleChecksOnReceive=perform fsck style checks on receive @@ -170,6 +173,7 @@ usage_recurseIntoSubtrees=recurse into subtrees usage_recordChangesToRepository=Record changes to the repository usage_setTheGitRepositoryToOperateOn=set the git repository to operate on usage_showRefNamesMatchingCommits=Show ref names matching commits +usage_showPatch=display patch usage_symbolicVersionForTheProject=Symbolic version for the project usage_synchronizeIPZillaData=Synchronize IPZilla data usage_tagMessage=tag message diff --git a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java index fc1e400ab..a3fca638b 100644 --- a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java +++ b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java @@ -45,28 +45,28 @@ package org.eclipse.jgit.pgm; +import java.io.BufferedOutputStream; import java.io.IOException; -import java.io.PrintStream; +import java.io.PrintWriter; import java.util.ArrayList; import java.util.List; -import org.kohsuke.args4j.Argument; -import org.kohsuke.args4j.Option; - +import org.eclipse.jgit.diff.DiffEntry; import org.eclipse.jgit.diff.DiffFormatter; -import org.eclipse.jgit.diff.MyersDiff; import org.eclipse.jgit.diff.RawText; import org.eclipse.jgit.diff.RawTextIgnoreAllWhitespace; import org.eclipse.jgit.diff.RawTextIgnoreLeadingWhitespace; import org.eclipse.jgit.diff.RawTextIgnoreTrailingWhitespace; import org.eclipse.jgit.diff.RawTextIgnoreWhitespaceChange; -import org.eclipse.jgit.lib.FileMode; -import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.diff.RenameDetector; +import org.eclipse.jgit.lib.TextProgressMonitor; import org.eclipse.jgit.pgm.opt.PathTreeFilterHandler; import org.eclipse.jgit.treewalk.AbstractTreeIterator; import org.eclipse.jgit.treewalk.TreeWalk; import org.eclipse.jgit.treewalk.filter.AndTreeFilter; import org.eclipse.jgit.treewalk.filter.TreeFilter; +import org.kohsuke.args4j.Argument; +import org.kohsuke.args4j.Option; @Command(common = true, usage = "usage_ShowDiffs") class Diff extends TextBuiltin { @@ -78,9 +78,15 @@ void tree_0(final AbstractTreeIterator c) { @Argument(index = 1, metaVar = "metaVar_treeish", required = true) private final List trees = new ArrayList(); - @Option(name = "--", metaVar = "metaVar_port", multiValued = true, handler = PathTreeFilterHandler.class) + @Option(name = "--", metaVar = "metaVar_paths", multiValued = true, handler = PathTreeFilterHandler.class) private TreeFilter pathFilter = TreeFilter.ALL; + @Option(name = "-M", usage = "usage_detectRenames") + private boolean detectRenames; + + @Option(name = "--name-status", usage = "usage_nameStatus") + private boolean showNameAndStatusOnly; + @Option(name = "--ignore-space-at-eol") private boolean ignoreWsTrailing; @@ -93,10 +99,69 @@ void tree_0(final AbstractTreeIterator c) { @Option(name = "-w", aliases = { "--ignore-all-space" }) private boolean ignoreWsAll; - private DiffFormatter fmt = new DiffFormatter(); + @Option(name = "-U", aliases = { "--unified" }, metaVar = "metaVar_linesOfContext") + void unified(int lines) { + fmt.setContext(lines); + } + + private DiffFormatter fmt = new DiffFormatter() { + @Override + protected RawText newRawText(byte[] raw) { + if (ignoreWsAll) + return new RawTextIgnoreAllWhitespace(raw); + else if (ignoreWsTrailing) + return new RawTextIgnoreTrailingWhitespace(raw); + else if (ignoreWsChange) + return new RawTextIgnoreWhitespaceChange(raw); + else if (ignoreWsLeading) + return new RawTextIgnoreLeadingWhitespace(raw); + else + return new RawText(raw); + } + }; @Override protected void run() throws Exception { + List files = scan(); + + if (showNameAndStatusOnly) { + nameStatus(out, files); + out.flush(); + + } else { + BufferedOutputStream o = new BufferedOutputStream(System.out); + fmt.format(o, db, files); + o.flush(); + } + } + + static void nameStatus(PrintWriter out, List files) { + for (DiffEntry ent : files) { + switch (ent.getChangeType()) { + case ADD: + out.println("A\t" + ent.getNewName()); + break; + case DELETE: + out.println("D\t" + ent.getOldName()); + break; + case MODIFY: + out.println("M\t" + ent.getNewName()); + break; + case COPY: + out.format("C%1$03d\t%2$s\t%3$s", ent.getScore(), // + ent.getOldName(), ent.getNewName()); + out.println(); + break; + case RENAME: + out.format("R%1$03d\t%2$s\t%3$s", ent.getScore(), // + ent.getOldName(), ent.getNewName()); + out.println(); + break; + } + } + } + + private List scan() throws IOException { final TreeWalk walk = new TreeWalk(db); walk.reset(); walk.setRecursive(true); @@ -104,65 +169,12 @@ protected void run() throws Exception { walk.addTree(i); walk.setFilter(AndTreeFilter.create(TreeFilter.ANY_DIFF, pathFilter)); - while (walk.next()) - outputDiff(System.out, walk.getPathString(), - walk.getObjectId(0), walk.getFileMode(0), - walk.getObjectId(1), walk.getFileMode(1)); - } - - protected void outputDiff(PrintStream out, String path, - ObjectId id1, FileMode mode1, ObjectId id2, FileMode mode2) throws IOException { - String name1 = "a/" + path; - String name2 = "b/" + path; - out.println("diff --git " + name1 + " " + name2); - boolean isNew=false; - boolean isDelete=false; - if (id1.equals(ObjectId.zeroId())) { - out.println("new file mode " + mode2); - isNew=true; - } else if (id2.equals(ObjectId.zeroId())) { - out.println("deleted file mode " + mode1); - isDelete=true; - } else if (!mode1.equals(mode2)) { - out.println("old mode " + mode1); - out.println("new mode " + mode2); + List files = DiffEntry.scan(walk); + if (detectRenames) { + RenameDetector rd = new RenameDetector(db); + rd.addAll(files); + files = rd.compute(new TextProgressMonitor()); } - out.println("index " + id1.abbreviate(db, 7).name() - + ".." + id2.abbreviate(db, 7).name() - + (mode1.equals(mode2) ? " " + mode1 : "")); - out.println("--- " + (isNew ? "/dev/null" : name1)); - out.println("+++ " + (isDelete ? "/dev/null" : name2)); - - byte[] aRaw = getRawBytes(id1); - byte[] bRaw = getRawBytes(id2); - - if (RawText.isBinary(aRaw) || RawText.isBinary(bRaw)) { - out.println("Binary files differ"); - return; - } - - RawText a = getRawText(aRaw); - RawText b = getRawText(bRaw); - MyersDiff diff = new MyersDiff(a, b); - fmt.formatEdits(out, a, b, diff.getEdits()); - } - - private byte[] getRawBytes(ObjectId id) throws IOException { - if (id.equals(ObjectId.zeroId())) - return new byte[] {}; - return db.openBlob(id).getCachedBytes(); - } - - private RawText getRawText(byte[] raw) { - if (ignoreWsAll) - return new RawTextIgnoreAllWhitespace(raw); - else if (ignoreWsTrailing) - return new RawTextIgnoreTrailingWhitespace(raw); - else if (ignoreWsChange) - return new RawTextIgnoreWhitespaceChange(raw); - else if (ignoreWsLeading) - return new RawTextIgnoreLeadingWhitespace(raw); - else - return new RawText(raw); + return files; } } diff --git a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Log.java b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Log.java index 9aa197e4a..511da4498 100644 --- a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Log.java +++ b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Log.java @@ -45,22 +45,32 @@ package org.eclipse.jgit.pgm; +import java.io.BufferedOutputStream; +import java.io.IOException; import java.text.DateFormat; import java.text.MessageFormat; import java.text.SimpleDateFormat; import java.util.Collection; import java.util.Iterator; +import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.TimeZone; -import org.kohsuke.args4j.Option; +import org.eclipse.jgit.diff.DiffEntry; +import org.eclipse.jgit.diff.DiffFormatter; +import org.eclipse.jgit.diff.RenameDetector; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.PersonIdent; import org.eclipse.jgit.lib.Ref; +import org.eclipse.jgit.lib.TextProgressMonitor; import org.eclipse.jgit.revwalk.RevCommit; import org.eclipse.jgit.revwalk.RevWalk; +import org.eclipse.jgit.treewalk.TreeWalk; +import org.eclipse.jgit.treewalk.filter.AndTreeFilter; +import org.eclipse.jgit.treewalk.filter.TreeFilter; +import org.kohsuke.args4j.Option; @Command(common = true, usage = "usage_viewCommitHistory") class Log extends RevWalkTextBuiltin { @@ -73,6 +83,22 @@ class Log extends RevWalkTextBuiltin { @Option(name="--decorate", usage="usage_showRefNamesMatchingCommits") private boolean decorate; + @Option(name = "-M", usage = "usage_detectRenames") + private boolean detectRenames; + + @Option(name = "--name-status", usage = "usage_nameStatus") + private boolean showNameAndStatusOnly; + + @Option(name = "-p", usage = "usage_showPatch") + private boolean showPatch; + + @Option(name = "-U", aliases = { "--unified" }, metaVar = "metaVar_linesOfContext") + void unified(int lines) { + diffFmt.setContext(lines); + } + + private DiffFormatter diffFmt = new DiffFormatter(); + Log() { fmt = new SimpleDateFormat("EEE MMM dd HH:mm:ss yyyy ZZZZZ", Locale.US); } @@ -120,6 +146,34 @@ protected void show(final RevCommit c) throws Exception { } out.println(); + if (c.getParentCount() > 0 && (showNameAndStatusOnly || showPatch)) + showDiff(c); out.flush(); } + + private void showDiff(RevCommit c) throws IOException { + final TreeWalk tw = new TreeWalk(db); + tw.reset(); + tw.setRecursive(true); + tw.addTree(c.getParent(0).getTree()); + tw.addTree(c.getTree()); + tw.setFilter(AndTreeFilter.create(TreeFilter.ANY_DIFF, pathFilter)); + + List files = DiffEntry.scan(tw); + if (detectRenames) { + RenameDetector rd = new RenameDetector(db); + rd.addAll(files); + files = rd.compute(new TextProgressMonitor()); + } + + if (showNameAndStatusOnly) { + Diff.nameStatus(out, files); + + } else { + out.flush(); + BufferedOutputStream o = new BufferedOutputStream(System.out); + diffFmt.format(o, db, files); + o.flush(); + } + } } diff --git a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/RevWalkTextBuiltin.java b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/RevWalkTextBuiltin.java index ea6eeb102..beb961d99 100644 --- a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/RevWalkTextBuiltin.java +++ b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/RevWalkTextBuiltin.java @@ -114,7 +114,7 @@ void enableBoundary(final boolean on) { private final List commits = new ArrayList(); @Option(name = "--", metaVar = "metaVar_path", multiValued = true, handler = PathTreeFilterHandler.class) - private TreeFilter pathFilter = TreeFilter.ALL; + protected TreeFilter pathFilter = TreeFilter.ALL; private final List revLimiter = new ArrayList(); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java index c40d3b700..b08d1cb50 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java @@ -44,6 +44,7 @@ package org.eclipse.jgit.diff; +import static org.eclipse.jgit.lib.Constants.encode; import static org.eclipse.jgit.lib.Constants.encodeASCII; import java.io.IOException; @@ -51,7 +52,13 @@ import java.util.List; import org.eclipse.jgit.JGitText; +import org.eclipse.jgit.lib.AbbreviatedObjectId; +import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.lib.FileMode; +import org.eclipse.jgit.lib.ObjectLoader; +import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.patch.FileHeader; +import org.eclipse.jgit.util.QuotedString; /** * Format an {@link EditList} as a Git style unified patch script. @@ -80,6 +87,159 @@ public void setContext(final int lineCount) { context = lineCount; } + /** + * Format a patch script from a list of difference entries. + * + * @param out + * stream to write the patch script out to. + * @param src + * repository the file contents can be read from. + * @param entries + * entries describing the affected files. + * @throws IOException + * a file's content cannot be read, or the output stream cannot + * be written to. + */ + public void format(final OutputStream out, Repository src, + List entries) throws IOException { + for(DiffEntry ent : entries) { + if (ent instanceof FileHeader) { + format( + out, + (FileHeader) ent, // + newRawText(open(src, ent.getOldMode(), ent.getOldId())), + newRawText(open(src, ent.getNewMode(), ent.getNewId()))); + } else { + format(out, src, ent); + } + } + } + + private void format(OutputStream out, Repository src, DiffEntry ent) + throws IOException { + String oldName = quotePath("a/" + ent.getOldName()); + String newName = quotePath("b/" + ent.getNewName()); + out.write(encode("diff --git " + oldName + " " + newName + "\n")); + + switch(ent.getChangeType()) { + case ADD: + out.write(encodeASCII("new file mode ")); + ent.getNewMode().copyTo(out); + out.write('\n'); + break; + + case DELETE: + out.write(encodeASCII("deleted file mode ")); + ent.getOldMode().copyTo(out); + out.write('\n'); + break; + + case RENAME: + out.write(encode("similarity index " + ent.getScore() + "%")); + out.write('\n'); + + out.write(encode("rename from " + quotePath(ent.getOldName()))); + out.write('\n'); + + out.write(encode("rename to " + quotePath(ent.getNewName()))); + out.write('\n'); + break; + + case COPY: + out.write(encode("similarity index " + ent.getScore() + "%")); + out.write('\n'); + + out.write(encode("copy from " + quotePath(ent.getOldName()))); + out.write('\n'); + + out.write(encode("copy to " + quotePath(ent.getNewName()))); + out.write('\n'); + + if (!ent.getOldMode().equals(ent.getNewMode())) { + out.write(encodeASCII("new file mode ")); + ent.getNewMode().copyTo(out); + out.write('\n'); + } + break; + } + + switch (ent.getChangeType()) { + case RENAME: + case MODIFY: + if (!ent.getOldMode().equals(ent.getNewMode())) { + out.write(encodeASCII("old mode ")); + ent.getOldMode().copyTo(out); + out.write('\n'); + + out.write(encodeASCII("new mode ")); + ent.getNewMode().copyTo(out); + out.write('\n'); + } + } + + out.write(encodeASCII("index " // + + format(src, ent.getOldId()) // + + ".." // + + format(src, ent.getNewId()))); + if (ent.getOldMode().equals(ent.getNewMode())) { + out.write(' '); + ent.getNewMode().copyTo(out); + } + out.write('\n'); + out.write(encode("--- " + oldName + '\n')); + out.write(encode("+++ " + newName + '\n')); + + byte[] aRaw = open(src, ent.getOldMode(), ent.getOldId()); + byte[] bRaw = open(src, ent.getNewMode(), ent.getNewId()); + + if (RawText.isBinary(aRaw) || RawText.isBinary(bRaw)) { + out.write(encodeASCII("Binary files differ\n")); + + } else { + RawText a = newRawText(aRaw); + RawText b = newRawText(bRaw); + formatEdits(out, a, b, new MyersDiff(a, b).getEdits()); + } + } + + /** + * Construct a RawText sequence for use with {@link MyersDiff}. + * + * @param content + * text to be compared. + * @return the raw text instance to handle the content. + */ + protected RawText newRawText(byte[] content) { + return new RawText(content); + } + + private String format(Repository db, AbbreviatedObjectId oldId) { + if (oldId.isComplete()) + oldId = oldId.toObjectId().abbreviate(db, 8); + return oldId.name(); + } + + private static String quotePath(String name) { + String q = QuotedString.GIT_PATH.quote(name); + return ('"' + name + '"').equals(q) ? name : q; + } + + private byte[] open(Repository src, FileMode mode, AbbreviatedObjectId id) + throws IOException { + if (mode == FileMode.MISSING) + return new byte[] {}; + + if (mode.getObjectType() != Constants.OBJ_BLOB) + return new byte[] {}; + + if (id.isComplete()) { + ObjectLoader ldr = src.openObject(id.toObjectId()); + return ldr.getCachedBytes(); + } + + return new byte[] {}; + } + /** * Format a patch script, reusing a previously parsed FileHeader. *

From 5be90be996551302d59a07ea49af5e5b9fed1cb3 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Sat, 3 Jul 2010 16:58:37 -0700 Subject: [PATCH 05/17] Redo DiffFormatter API to be easier to use Passing around the OutputStream and the Repository is crazy. Instead put the stream in the constructor, since this formatter exists only to output to the stream, and put the repository as a member variable that can be optionally set. Change-Id: I2bad012fee7f40dc1346700ebd19f1e048982878 Signed-off-by: Shawn O. Pearce --- .../src/org/eclipse/jgit/pgm/Diff.java | 9 +- .../src/org/eclipse/jgit/pgm/Log.java | 10 +- .../jgit/diff/DiffFormatterReflowTest.java | 4 +- .../org/eclipse/jgit/JGitText.properties | 1 + .../src/org/eclipse/jgit/JGitText.java | 1 + .../org/eclipse/jgit/diff/DiffFormatter.java | 215 +++++++++++------- 6 files changed, 143 insertions(+), 97 deletions(-) diff --git a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java index a3fca638b..32499618d 100644 --- a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java +++ b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java @@ -104,7 +104,8 @@ void unified(int lines) { fmt.setContext(lines); } - private DiffFormatter fmt = new DiffFormatter() { + private DiffFormatter fmt = new DiffFormatter( // + new BufferedOutputStream(System.out)) { @Override protected RawText newRawText(byte[] raw) { if (ignoreWsAll) @@ -129,9 +130,9 @@ protected void run() throws Exception { out.flush(); } else { - BufferedOutputStream o = new BufferedOutputStream(System.out); - fmt.format(o, db, files); - o.flush(); + fmt.setRepository(db); + fmt.format(files); + fmt.flush(); } } diff --git a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Log.java b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Log.java index 511da4498..aa4e8ae3c 100644 --- a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Log.java +++ b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Log.java @@ -97,7 +97,8 @@ void unified(int lines) { diffFmt.setContext(lines); } - private DiffFormatter diffFmt = new DiffFormatter(); + private DiffFormatter diffFmt = new DiffFormatter( // + new BufferedOutputStream(System.out)); Log() { fmt = new SimpleDateFormat("EEE MMM dd HH:mm:ss yyyy ZZZZZ", Locale.US); @@ -170,10 +171,9 @@ private void showDiff(RevCommit c) throws IOException { Diff.nameStatus(out, files); } else { - out.flush(); - BufferedOutputStream o = new BufferedOutputStream(System.out); - diffFmt.format(o, db, files); - o.flush(); + diffFmt.setRepository(db); + diffFmt.format(files); + diffFmt.flush(); } } } diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffFormatterReflowTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffFormatterReflowTest.java index d9e50d20a..d9c95d765 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffFormatterReflowTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffFormatterReflowTest.java @@ -68,7 +68,7 @@ public class DiffFormatterReflowTest extends TestCase { protected void setUp() throws Exception { super.setUp(); out = new ByteArrayOutputStream(); - fmt = new DiffFormatter(); + fmt = new DiffFormatter(out); } public void testNegativeContextFails() throws IOException { @@ -143,7 +143,7 @@ private void assertFormatted() throws IOException { } private void assertFormatted(final String name) throws IOException { - fmt.format(out, file, a, b); + fmt.format(file, a, b); final String exp = RawParseUtils.decode(readFile(name)); assertEquals(exp, RawParseUtils.decode(out.toByteArray())); } diff --git a/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties b/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties index a8b9a33a4..afe7a8c05 100644 --- a/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties +++ b/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties @@ -301,6 +301,7 @@ renamesAlreadyFound=Renames have already been found. renamesFindingByContent=Finding renames by content similarity renamesFindingExact=Finding exact renames repositoryAlreadyExists=Repository already exists: {0} +repositoryIsRequired=Repository is required. repositoryNotFound=repository not found: {0} requiredHashFunctionNotAvailable=Required hash function {0} not available. resolvingDeltas=Resolving deltas diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java b/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java index ed5192cf5..1f21b4a02 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java @@ -360,6 +360,7 @@ public static JGitText get() { /***/ public String renamesFindingByContent; /***/ public String renamesFindingExact; /***/ public String repositoryAlreadyExists; + /***/ public String repositoryIsRequired; /***/ public String repositoryNotFound; /***/ public String requiredHashFunctionNotAvailable; /***/ public String resolvingDeltas; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java index b08d1cb50..00cfe8b08 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java @@ -66,13 +66,40 @@ public class DiffFormatter { private static final byte[] noNewLine = encodeASCII("\\ No newline at end of file\n"); + private final OutputStream out; + + private Repository db; + private int context; - /** Create a new formatter with a default level of context. */ - public DiffFormatter() { + /** + * Create a new formatter with a default level of context. + * + * @param out + * the stream the formatter will write line data to. This stream + * should have buffering arranged by the caller, as many small + * writes are performed to it. + */ + public DiffFormatter(OutputStream out) { + this.out = out; setContext(3); } + /** @return the stream we are outputting data to. */ + protected OutputStream getOutputStream() { + return out; + } + + /** + * Set the repository the formatter can load object contents from. + * + * @param repository + * source repository holding referenced objects. + */ + public void setRepository(Repository repository) { + db = repository; + } + /** * Change the number of lines of context to display. * @@ -83,45 +110,61 @@ public DiffFormatter() { */ public void setContext(final int lineCount) { if (lineCount < 0) - throw new IllegalArgumentException(JGitText.get().contextMustBeNonNegative); + throw new IllegalArgumentException( + JGitText.get().contextMustBeNonNegative); context = lineCount; } + /** + * Flush the underlying output stream of this formatter. + * + * @throws IOException + * the stream's own flush method threw an exception. + */ + public void flush() throws IOException { + out.flush(); + } + /** * Format a patch script from a list of difference entries. * - * @param out - * stream to write the patch script out to. - * @param src - * repository the file contents can be read from. * @param entries * entries describing the affected files. * @throws IOException * a file's content cannot be read, or the output stream cannot * be written to. */ - public void format(final OutputStream out, Repository src, - List entries) throws IOException { - for(DiffEntry ent : entries) { - if (ent instanceof FileHeader) { - format( - out, - (FileHeader) ent, // - newRawText(open(src, ent.getOldMode(), ent.getOldId())), - newRawText(open(src, ent.getNewMode(), ent.getNewId()))); - } else { - format(out, src, ent); - } + public void format(List entries) throws IOException { + for (DiffEntry ent : entries) + format(ent); + } + + /** + * Format a patch script for one file entry. + * + * @param entry + * the entry to be formatted. + * @throws IOException + * a file's content cannot be read, or the output stream cannot + * be written to. + */ + public void format(DiffEntry entry) throws IOException { + if (entry instanceof FileHeader) { + format( + (FileHeader) entry, // + newRawText(open(entry.getOldMode(), entry.getOldId())), + newRawText(open(entry.getNewMode(), entry.getNewId()))); + } else { + formatAndDiff(entry); } } - private void format(OutputStream out, Repository src, DiffEntry ent) - throws IOException { + private void formatAndDiff(DiffEntry ent) throws IOException { String oldName = quotePath("a/" + ent.getOldName()); String newName = quotePath("b/" + ent.getNewName()); out.write(encode("diff --git " + oldName + " " + newName + "\n")); - switch(ent.getChangeType()) { + switch (ent.getChangeType()) { case ADD: out.write(encodeASCII("new file mode ")); ent.getNewMode().copyTo(out); @@ -135,7 +178,7 @@ private void format(OutputStream out, Repository src, DiffEntry ent) break; case RENAME: - out.write(encode("similarity index " + ent.getScore() + "%")); + out.write(encodeASCII("similarity index " + ent.getScore() + "%")); out.write('\n'); out.write(encode("rename from " + quotePath(ent.getOldName()))); @@ -146,7 +189,7 @@ private void format(OutputStream out, Repository src, DiffEntry ent) break; case COPY: - out.write(encode("similarity index " + ent.getScore() + "%")); + out.write(encodeASCII("similarity index " + ent.getScore() + "%")); out.write('\n'); out.write(encode("copy from " + quotePath(ent.getOldName()))); @@ -178,9 +221,9 @@ private void format(OutputStream out, Repository src, DiffEntry ent) } out.write(encodeASCII("index " // - + format(src, ent.getOldId()) // + + format(ent.getOldId()) // + ".." // - + format(src, ent.getNewId()))); + + format(ent.getNewId()))); if (ent.getOldMode().equals(ent.getNewMode())) { out.write(' '); ent.getNewMode().copyTo(out); @@ -189,8 +232,8 @@ private void format(OutputStream out, Repository src, DiffEntry ent) out.write(encode("--- " + oldName + '\n')); out.write(encode("+++ " + newName + '\n')); - byte[] aRaw = open(src, ent.getOldMode(), ent.getOldId()); - byte[] bRaw = open(src, ent.getNewMode(), ent.getNewId()); + byte[] aRaw = open(ent.getOldMode(), ent.getOldId()); + byte[] bRaw = open(ent.getNewMode(), ent.getNewId()); if (RawText.isBinary(aRaw) || RawText.isBinary(bRaw)) { out.write(encodeASCII("Binary files differ\n")); @@ -198,7 +241,7 @@ private void format(OutputStream out, Repository src, DiffEntry ent) } else { RawText a = newRawText(aRaw); RawText b = newRawText(bRaw); - formatEdits(out, a, b, new MyersDiff(a, b).getEdits()); + formatEdits(a, b, new MyersDiff(a, b).getEdits()); } } @@ -213,8 +256,8 @@ protected RawText newRawText(byte[] content) { return new RawText(content); } - private String format(Repository db, AbbreviatedObjectId oldId) { - if (oldId.isComplete()) + private String format(AbbreviatedObjectId oldId) { + if (oldId.isComplete() && db != null) oldId = oldId.toObjectId().abbreviate(db, 8); return oldId.name(); } @@ -224,7 +267,7 @@ private static String quotePath(String name) { return ('"' + name + '"').equals(q) ? name : q; } - private byte[] open(Repository src, FileMode mode, AbbreviatedObjectId id) + private byte[] open(FileMode mode, AbbreviatedObjectId id) throws IOException { if (mode == FileMode.MISSING) return new byte[] {}; @@ -232,8 +275,10 @@ private byte[] open(Repository src, FileMode mode, AbbreviatedObjectId id) if (mode.getObjectType() != Constants.OBJ_BLOB) return new byte[] {}; + if (db == null) + throw new IllegalStateException(JGitText.get().repositoryIsRequired); if (id.isComplete()) { - ObjectLoader ldr = src.openObject(id.toObjectId()); + ObjectLoader ldr = db.openObject(id.toObjectId()); return ldr.getCachedBytes(); } @@ -247,8 +292,6 @@ private byte[] open(Repository src, FileMode mode, AbbreviatedObjectId id) * to increase or reduce the number of lines of context within the script. * All header lines are reused as-is from the supplied FileHeader. * - * @param out - * stream to write the patch script out to. * @param head * existing file header containing the header lines to copy. * @param a @@ -260,8 +303,8 @@ private byte[] open(Repository src, FileMode mode, AbbreviatedObjectId id) * @throws IOException * writing to the supplied stream failed. */ - public void format(final OutputStream out, final FileHeader head, - final RawText a, final RawText b) throws IOException { + public void format(final FileHeader head, final RawText a, final RawText b) + throws IOException { // Reuse the existing FileHeader as-is by blindly copying its // header lines, but avoiding its hunks. Instead we recreate // the hunks from the text instances we have been supplied. @@ -272,19 +315,22 @@ public void format(final OutputStream out, final FileHeader head, end = head.getHunks().get(0).getStartOffset(); out.write(head.getBuffer(), start, end - start); - formatEdits(out, a, b, head.toEditList()); + formatEdits(a, b, head.toEditList()); } /** * Formats a list of edits in unified diff format - * @param out where the unified diff is written to - * @param a the text A which was compared - * @param b the text B which was compared - * @param edits some differences which have been calculated between A and B + * + * @param a + * the text A which was compared + * @param b + * the text B which was compared + * @param edits + * some differences which have been calculated between A and B * @throws IOException */ - public void formatEdits(final OutputStream out, final RawText a, - final RawText b, final EditList edits) throws IOException { + public void formatEdits(final RawText a, final RawText b, + final EditList edits) throws IOException { for (int curIdx = 0; curIdx < edits.size();) { Edit curEdit = edits.get(curIdx); final int endIdx = findCombinedEnd(edits, curIdx); @@ -295,18 +341,24 @@ public void formatEdits(final OutputStream out, final RawText a, final int aEnd = Math.min(a.size(), endEdit.getEndA() + context); final int bEnd = Math.min(b.size(), endEdit.getEndB() + context); - writeHunkHeader(out, aCur, aEnd, bCur, bEnd); + writeHunkHeader(aCur, aEnd, bCur, bEnd); while (aCur < aEnd || bCur < bEnd) { if (aCur < curEdit.getBeginA() || endIdx + 1 < curIdx) { - writeContextLine(out, a, aCur, isEndOfLineMissing(a, aCur)); + writeContextLine(a, aCur); + if (isEndOfLineMissing(a, aCur)) + out.write(noNewLine); aCur++; bCur++; } else if (aCur < curEdit.getEndA()) { - writeRemovedLine(out, a, aCur, isEndOfLineMissing(a, aCur)); + writeRemovedLine(a, aCur); + if (isEndOfLineMissing(a, aCur)) + out.write(noNewLine); aCur++; } else if (bCur < curEdit.getEndB()) { - writeAddedLine(out, b, bCur, isEndOfLineMissing(b, bCur)); + writeAddedLine(b, bCur); + if (isEndOfLineMissing(b, bCur)) + out.write(noNewLine); bCur++; } @@ -317,21 +369,17 @@ public void formatEdits(final OutputStream out, final RawText a, } /** - * Output a line of diff context + * Output a line of context (unmodified line). * - * @param out - * OutputStream * @param text * RawText for accessing raw data * @param line * the line number within text - * @param endOfLineMissing - * true if we should add the GNU end of line missing warning * @throws IOException */ - protected void writeContextLine(final OutputStream out, final RawText text, - final int line, boolean endOfLineMissing) throws IOException { - writeLine(out, ' ', text, line, endOfLineMissing); + protected void writeContextLine(final RawText text, final int line) + throws IOException { + writeLine(' ', text, line); } private boolean isEndOfLineMissing(final RawText text, final int line) { @@ -339,46 +387,36 @@ private boolean isEndOfLineMissing(final RawText text, final int line) { } /** - * Output an added line + * Output an added line. * - * @param out - * OutputStream * @param text * RawText for accessing raw data * @param line * the line number within text - * @param endOfLineMissing - * true if we should add the gnu end of line missing warning * @throws IOException */ - protected void writeAddedLine(final OutputStream out, final RawText text, final int line, boolean endOfLineMissing) + protected void writeAddedLine(final RawText text, final int line) throws IOException { - writeLine(out, '+', text, line, endOfLineMissing); + writeLine('+', text, line); } /** * Output a removed line * - * @param out - * OutputStream * @param text * RawText for accessing raw data * @param line * the line number within text - * @param endOfLineMissing - * true if we should add the gnu end of line missing warning * @throws IOException */ - protected void writeRemovedLine(final OutputStream out, final RawText text, - final int line, boolean endOfLineMissing) throws IOException { - writeLine(out, '-', text, line, endOfLineMissing); + protected void writeRemovedLine(final RawText text, final int line) + throws IOException { + writeLine('-', text, line); } /** * Output a hunk header * - * @param out - * OutputStream * @param aStartLine * within first source * @param aEndLine @@ -389,20 +427,20 @@ protected void writeRemovedLine(final OutputStream out, final RawText text, * within second source * @throws IOException */ - protected void writeHunkHeader(final OutputStream out, int aStartLine, int aEndLine, + protected void writeHunkHeader(int aStartLine, int aEndLine, int bStartLine, int bEndLine) throws IOException { out.write('@'); out.write('@'); - writeRange(out, '-', aStartLine + 1, aEndLine - aStartLine); - writeRange(out, '+', bStartLine + 1, bEndLine - bStartLine); + writeRange('-', aStartLine + 1, aEndLine - aStartLine); + writeRange('+', bStartLine + 1, bEndLine - bStartLine); out.write(' '); out.write('@'); out.write('@'); out.write('\n'); } - private static void writeRange(final OutputStream out, final char prefix, - final int begin, final int cnt) throws IOException { + private void writeRange(final char prefix, final int begin, final int cnt) + throws IOException { out.write(' '); out.write(prefix); switch (cnt) { @@ -431,18 +469,23 @@ private static void writeRange(final OutputStream out, final char prefix, } } - private static void writeLine(final OutputStream out, final char prefix, - final RawText text, final int cur, boolean noNewLineIndicator) throws IOException { + /** + * Write a standard patch script line. + * + * @param prefix + * prefix before the line, typically '-', '+', ' '. + * @param text + * the text object to obtain the line from. + * @param cur + * line number to output. + * @throws IOException + * the stream threw an exception while writing to it. + */ + protected void writeLine(final char prefix, final RawText text, + final int cur) throws IOException { out.write(prefix); text.writeLine(out, cur); out.write('\n'); - if (noNewLineIndicator) - writeNoNewLine(out); - } - - private static void writeNoNewLine(final OutputStream out) - throws IOException { - out.write(noNewLine); } private int findCombinedEnd(final List edits, final int i) { From bd8740dc1440e5b2aca1464404182105b2dc1853 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Sat, 3 Jul 2010 16:59:06 -0700 Subject: [PATCH 06/17] Format submodule links during differences Instead of crashing, output a submodule link with the simple "Subproject commit $fullid\n" syntax used by C Git. Change-Id: Iae8646941683fb19b73fb038217d2e3bf5f77fa9 Signed-off-by: Shawn O. Pearce --- .../org/eclipse/jgit/diff/DiffFormatter.java | 30 +++++++++++++------ 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java index 00cfe8b08..e114b9220 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java @@ -46,6 +46,7 @@ import static org.eclipse.jgit.lib.Constants.encode; import static org.eclipse.jgit.lib.Constants.encodeASCII; +import static org.eclipse.jgit.lib.FileMode.GITLINK; import java.io.IOException; import java.io.OutputStream; @@ -232,16 +233,27 @@ private void formatAndDiff(DiffEntry ent) throws IOException { out.write(encode("--- " + oldName + '\n')); out.write(encode("+++ " + newName + '\n')); - byte[] aRaw = open(ent.getOldMode(), ent.getOldId()); - byte[] bRaw = open(ent.getNewMode(), ent.getNewId()); - - if (RawText.isBinary(aRaw) || RawText.isBinary(bRaw)) { - out.write(encodeASCII("Binary files differ\n")); - + if (ent.getOldMode() == GITLINK || ent.getNewMode() == GITLINK) { + if (ent.getOldMode() == GITLINK) { + out.write(encodeASCII("-Subproject commit " + + ent.getOldId().name() + "\n")); + } + if (ent.getNewMode() == GITLINK) { + out.write(encodeASCII("+Subproject commit " + + ent.getNewId().name() + "\n")); + } } else { - RawText a = newRawText(aRaw); - RawText b = newRawText(bRaw); - formatEdits(a, b, new MyersDiff(a, b).getEdits()); + byte[] aRaw = open(ent.getOldMode(), ent.getOldId()); + byte[] bRaw = open(ent.getNewMode(), ent.getNewId()); + + if (RawText.isBinary(aRaw) || RawText.isBinary(bRaw)) { + out.write(encodeASCII("Binary files differ\n")); + + } else { + RawText a = newRawText(aRaw); + RawText b = newRawText(bRaw); + formatEdits(a, b, new MyersDiff(a, b).getEdits()); + } } } From 8a0c58394db1bdea64737ed683d6c32639e567d9 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Sat, 3 Jul 2010 17:32:47 -0700 Subject: [PATCH 07/17] log: Add whitespace ignore options Similar to what we did with diff, implement whitespace ignore options for log too. This requires us to define some means of creating any RawText object type at will inside of DiffFormatter, so we define a new factory interface to construct RawText instances on demand. Unfortunately we have to copy the entire block of common options. args4j only processes the options/arguments on the one command class and Java doesn't support multiple inheritance. Change-Id: Ia16cd3a11b850fffae9fbe7b721d7e43f1d0e8a5 Signed-off-by: Shawn O. Pearce --- .../org/eclipse/jgit/pgm/CLIText.properties | 2 + .../src/org/eclipse/jgit/pgm/Diff.java | 65 +++++++++++------- .../src/org/eclipse/jgit/pgm/Log.java | 54 +++++++++++++-- .../org/eclipse/jgit/JGitText.properties | 1 + .../src/org/eclipse/jgit/JGitText.java | 1 + .../org/eclipse/jgit/diff/DiffFormatter.java | 67 +++++++++++-------- .../src/org/eclipse/jgit/diff/RawText.java | 19 ++++++ .../jgit/diff/RawTextIgnoreAllWhitespace.java | 7 ++ .../diff/RawTextIgnoreLeadingWhitespace.java | 7 ++ .../diff/RawTextIgnoreTrailingWhitespace.java | 7 ++ .../diff/RawTextIgnoreWhitespaceChange.java | 9 ++- 11 files changed, 181 insertions(+), 58 deletions(-) diff --git a/org.eclipse.jgit.pgm/resources/org/eclipse/jgit/pgm/CLIText.properties b/org.eclipse.jgit.pgm/resources/org/eclipse/jgit/pgm/CLIText.properties index 67357be1d..e879d6b60 100644 --- a/org.eclipse.jgit.pgm/resources/org/eclipse/jgit/pgm/CLIText.properties +++ b/org.eclipse.jgit.pgm/resources/org/eclipse/jgit/pgm/CLIText.properties @@ -68,6 +68,7 @@ metaVar_gitDir=GIT_DIR metaVar_hostName=HOSTNAME metaVar_linesOfContext=lines metaVar_message=message +metaVar_n=n metaVar_name=name metaVar_object=object metaVar_op=OP @@ -171,6 +172,7 @@ usage_produceAnEclipseIPLog=Produce an Eclipse IP log usage_pruneStaleTrackingRefs=prune stale tracking refs usage_recurseIntoSubtrees=recurse into subtrees usage_recordChangesToRepository=Record changes to the repository +usage_renameLimit=limit size of rename matrix usage_setTheGitRepositoryToOperateOn=set the git repository to operate on usage_showRefNamesMatchingCommits=Show ref names matching commits usage_showPatch=display patch diff --git a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java index 32499618d..77ed73048 100644 --- a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java +++ b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java @@ -53,12 +53,12 @@ import org.eclipse.jgit.diff.DiffEntry; import org.eclipse.jgit.diff.DiffFormatter; -import org.eclipse.jgit.diff.RawText; import org.eclipse.jgit.diff.RawTextIgnoreAllWhitespace; import org.eclipse.jgit.diff.RawTextIgnoreLeadingWhitespace; import org.eclipse.jgit.diff.RawTextIgnoreTrailingWhitespace; import org.eclipse.jgit.diff.RawTextIgnoreWhitespaceChange; import org.eclipse.jgit.diff.RenameDetector; +import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.TextProgressMonitor; import org.eclipse.jgit.pgm.opt.PathTreeFilterHandler; import org.eclipse.jgit.treewalk.AbstractTreeIterator; @@ -70,6 +70,9 @@ @Command(common = true, usage = "usage_ShowDiffs") class Diff extends TextBuiltin { + private final DiffFormatter diffFmt = new DiffFormatter( // + new BufferedOutputStream(System.out)); + @Argument(index = 0, metaVar = "metaVar_treeish", required = true) void tree_0(final AbstractTreeIterator c) { trees.add(c); @@ -81,45 +84,55 @@ void tree_0(final AbstractTreeIterator c) { @Option(name = "--", metaVar = "metaVar_paths", multiValued = true, handler = PathTreeFilterHandler.class) private TreeFilter pathFilter = TreeFilter.ALL; + // BEGIN -- Options shared with Log + @Option(name = "-p", usage = "usage_showPatch") + boolean showPatch; + @Option(name = "-M", usage = "usage_detectRenames") private boolean detectRenames; + @Option(name = "-l", usage = "usage_renameLimit") + private Integer renameLimit; + @Option(name = "--name-status", usage = "usage_nameStatus") private boolean showNameAndStatusOnly; @Option(name = "--ignore-space-at-eol") - private boolean ignoreWsTrailing; + void ignoreSpaceAtEol(@SuppressWarnings("unused") boolean on) { + diffFmt.setRawTextFactory(RawTextIgnoreTrailingWhitespace.FACTORY); + } @Option(name = "--ignore-leading-space") - private boolean ignoreWsLeading; + void ignoreLeadingSpace(@SuppressWarnings("unused") boolean on) { + diffFmt.setRawTextFactory(RawTextIgnoreLeadingWhitespace.FACTORY); + } @Option(name = "-b", aliases = { "--ignore-space-change" }) - private boolean ignoreWsChange; + void ignoreSpaceChange(@SuppressWarnings("unused") boolean on) { + diffFmt.setRawTextFactory(RawTextIgnoreWhitespaceChange.FACTORY); + } @Option(name = "-w", aliases = { "--ignore-all-space" }) - private boolean ignoreWsAll; + void ignoreAllSpace(@SuppressWarnings("unused") boolean on) { + diffFmt.setRawTextFactory(RawTextIgnoreAllWhitespace.FACTORY); + } @Option(name = "-U", aliases = { "--unified" }, metaVar = "metaVar_linesOfContext") void unified(int lines) { - fmt.setContext(lines); + diffFmt.setContext(lines); } - private DiffFormatter fmt = new DiffFormatter( // - new BufferedOutputStream(System.out)) { - @Override - protected RawText newRawText(byte[] raw) { - if (ignoreWsAll) - return new RawTextIgnoreAllWhitespace(raw); - else if (ignoreWsTrailing) - return new RawTextIgnoreTrailingWhitespace(raw); - else if (ignoreWsChange) - return new RawTextIgnoreWhitespaceChange(raw); - else if (ignoreWsLeading) - return new RawTextIgnoreLeadingWhitespace(raw); - else - return new RawText(raw); - } - }; + @Option(name = "--abbrev", metaVar = "n") + void abbrev(int lines) { + diffFmt.setAbbreviationLength(lines); + } + + @Option(name = "--full-index") + void abbrev(@SuppressWarnings("unused") boolean on) { + diffFmt.setAbbreviationLength(Constants.OBJECT_ID_STRING_LENGTH); + } + + // END -- Options shared with Log @Override protected void run() throws Exception { @@ -130,9 +143,9 @@ protected void run() throws Exception { out.flush(); } else { - fmt.setRepository(db); - fmt.format(files); - fmt.flush(); + diffFmt.setRepository(db); + diffFmt.format(files); + diffFmt.flush(); } } @@ -173,6 +186,8 @@ private List scan() throws IOException { List files = DiffEntry.scan(walk); if (detectRenames) { RenameDetector rd = new RenameDetector(db); + if (renameLimit != null) + rd.setRenameLimit(renameLimit.intValue()); rd.addAll(files); files = rd.compute(new TextProgressMonitor()); } diff --git a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Log.java b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Log.java index aa4e8ae3c..83ef6eb87 100644 --- a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Log.java +++ b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Log.java @@ -60,8 +60,13 @@ import org.eclipse.jgit.diff.DiffEntry; import org.eclipse.jgit.diff.DiffFormatter; +import org.eclipse.jgit.diff.RawTextIgnoreAllWhitespace; +import org.eclipse.jgit.diff.RawTextIgnoreLeadingWhitespace; +import org.eclipse.jgit.diff.RawTextIgnoreTrailingWhitespace; +import org.eclipse.jgit.diff.RawTextIgnoreWhitespaceChange; import org.eclipse.jgit.diff.RenameDetector; import org.eclipse.jgit.lib.AnyObjectId; +import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.PersonIdent; import org.eclipse.jgit.lib.Ref; import org.eclipse.jgit.lib.TextProgressMonitor; @@ -78,27 +83,63 @@ class Log extends RevWalkTextBuiltin { private final DateFormat fmt; + private final DiffFormatter diffFmt = new DiffFormatter( // + new BufferedOutputStream(System.out)); + private Map> allRefsByPeeledObjectId; @Option(name="--decorate", usage="usage_showRefNamesMatchingCommits") private boolean decorate; + // BEGIN -- Options shared with Diff + @Option(name = "-p", usage = "usage_showPatch") + boolean showPatch; + @Option(name = "-M", usage = "usage_detectRenames") private boolean detectRenames; + @Option(name = "-l", usage = "usage_renameLimit") + private Integer renameLimit; + @Option(name = "--name-status", usage = "usage_nameStatus") private boolean showNameAndStatusOnly; - @Option(name = "-p", usage = "usage_showPatch") - private boolean showPatch; + @Option(name = "--ignore-space-at-eol") + void ignoreSpaceAtEol(@SuppressWarnings("unused") boolean on) { + diffFmt.setRawTextFactory(RawTextIgnoreTrailingWhitespace.FACTORY); + } + + @Option(name = "--ignore-leading-space") + void ignoreLeadingSpace(@SuppressWarnings("unused") boolean on) { + diffFmt.setRawTextFactory(RawTextIgnoreLeadingWhitespace.FACTORY); + } + + @Option(name = "-b", aliases = { "--ignore-space-change" }) + void ignoreSpaceChange(@SuppressWarnings("unused") boolean on) { + diffFmt.setRawTextFactory(RawTextIgnoreWhitespaceChange.FACTORY); + } + + @Option(name = "-w", aliases = { "--ignore-all-space" }) + void ignoreAllSpace(@SuppressWarnings("unused") boolean on) { + diffFmt.setRawTextFactory(RawTextIgnoreAllWhitespace.FACTORY); + } @Option(name = "-U", aliases = { "--unified" }, metaVar = "metaVar_linesOfContext") void unified(int lines) { diffFmt.setContext(lines); } - private DiffFormatter diffFmt = new DiffFormatter( // - new BufferedOutputStream(System.out)); + @Option(name = "--abbrev", metaVar = "metaVar_n") + void abbrev(int lines) { + diffFmt.setAbbreviationLength(lines); + } + + @Option(name = "--full-index") + void abbrev(@SuppressWarnings("unused") boolean on) { + diffFmt.setAbbreviationLength(Constants.OBJECT_ID_STRING_LENGTH); + } + + // END -- Options shared with Diff Log() { fmt = new SimpleDateFormat("EEE MMM dd HH:mm:ss yyyy ZZZZZ", Locale.US); @@ -147,7 +188,7 @@ protected void show(final RevCommit c) throws Exception { } out.println(); - if (c.getParentCount() > 0 && (showNameAndStatusOnly || showPatch)) + if (c.getParentCount() == 1 && (showNameAndStatusOnly || showPatch)) showDiff(c); out.flush(); } @@ -163,6 +204,8 @@ private void showDiff(RevCommit c) throws IOException { List files = DiffEntry.scan(tw); if (detectRenames) { RenameDetector rd = new RenameDetector(db); + if (renameLimit != null) + rd.setRenameLimit(renameLimit.intValue()); rd.addAll(files); files = rd.compute(new TextProgressMonitor()); } @@ -175,5 +218,6 @@ private void showDiff(RevCommit c) throws IOException { diffFmt.format(files); diffFmt.flush(); } + out.println(); } } diff --git a/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties b/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties index afe7a8c05..50fadfa81 100644 --- a/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties +++ b/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties @@ -6,6 +6,7 @@ JRELacksMD5Implementation=JRE lacks MD5 implementation URINotSupported=URI not supported: {0} URLNotFound={0} not found aNewObjectIdIsRequired=A NewObjectId is required. +abbreviationLengthMustBeNonNegative=Abbreviation length must not be negative. advertisementCameBefore=advertisement of {0}^{} came before {1} advertisementOfCameBefore=advertisement of {0}^{} came before {1} amazonS3ActionFailed={0} of '{1}' failed: {2} {3} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java b/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java index 1f21b4a02..f46461be6 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java @@ -66,6 +66,7 @@ public static JGitText get() { /***/ public String URINotSupported; /***/ public String URLNotFound; /***/ public String aNewObjectIdIsRequired; + /***/ public String abbreviationLengthMustBeNonNegative; /***/ public String advertisementCameBefore; /***/ public String advertisementOfCameBefore; /***/ public String amazonS3ActionFailed; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java index e114b9220..c12c54b05 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java @@ -73,6 +73,10 @@ public class DiffFormatter { private int context; + private int abbreviationLength; + + private RawText.Factory rawTextFactory = RawText.FACTORY; + /** * Create a new formatter with a default level of context. * @@ -84,6 +88,7 @@ public class DiffFormatter { public DiffFormatter(OutputStream out) { this.out = out; setContext(3); + setAbbreviationLength(8); } /** @return the stream we are outputting data to. */ @@ -116,6 +121,36 @@ public void setContext(final int lineCount) { context = lineCount; } + /** + * Change the number of digits to show in an ObjectId. + * + * @param count + * number of digits to show in an ObjectId. + */ + public void setAbbreviationLength(final int count) { + if (count < 0) + throw new IllegalArgumentException( + JGitText.get().abbreviationLengthMustBeNonNegative); + abbreviationLength = count; + } + + /** + * Set the helper that constructs difference output. + * + * @param type + * the factory to create different output. Different types of + * factories can produce different whitespace behavior, for + * example. + * @see RawText#FACTORY + * @see RawTextIgnoreAllWhitespace#FACTORY + * @see RawTextIgnoreLeadingWhitespace#FACTORY + * @see RawTextIgnoreTrailingWhitespace#FACTORY + * @see RawTextIgnoreWhitespaceChange#FACTORY + */ + public void setRawTextFactory(RawText.Factory type) { + rawTextFactory = type; + } + /** * Flush the underlying output stream of this formatter. * @@ -143,24 +178,13 @@ public void format(List entries) throws IOException { /** * Format a patch script for one file entry. * - * @param entry + * @param ent * the entry to be formatted. * @throws IOException * a file's content cannot be read, or the output stream cannot * be written to. */ - public void format(DiffEntry entry) throws IOException { - if (entry instanceof FileHeader) { - format( - (FileHeader) entry, // - newRawText(open(entry.getOldMode(), entry.getOldId())), - newRawText(open(entry.getNewMode(), entry.getNewId()))); - } else { - formatAndDiff(entry); - } - } - - private void formatAndDiff(DiffEntry ent) throws IOException { + public void format(DiffEntry ent) throws IOException { String oldName = quotePath("a/" + ent.getOldName()); String newName = quotePath("b/" + ent.getNewName()); out.write(encode("diff --git " + oldName + " " + newName + "\n")); @@ -250,27 +274,16 @@ private void formatAndDiff(DiffEntry ent) throws IOException { out.write(encodeASCII("Binary files differ\n")); } else { - RawText a = newRawText(aRaw); - RawText b = newRawText(bRaw); + RawText a = rawTextFactory.create(aRaw); + RawText b = rawTextFactory.create(bRaw); formatEdits(a, b, new MyersDiff(a, b).getEdits()); } } } - /** - * Construct a RawText sequence for use with {@link MyersDiff}. - * - * @param content - * text to be compared. - * @return the raw text instance to handle the content. - */ - protected RawText newRawText(byte[] content) { - return new RawText(content); - } - private String format(AbbreviatedObjectId oldId) { if (oldId.isComplete() && db != null) - oldId = oldId.toObjectId().abbreviate(db, 8); + oldId = oldId.toObjectId().abbreviate(db, abbreviationLength); return oldId.name(); } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java index c01cb7ad8..574d2edf9 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java @@ -65,6 +65,25 @@ * they are converting from "line number" to "element index". */ public class RawText implements Sequence { + /** Creates a RawText instance. */ + public static interface Factory { + /** + * Construct a RawText instance for the content. + * + * @param input + * the content array. + * @return a RawText instance wrapping this content. + */ + RawText create(byte[] input); + } + + /** Creates RawText that does not treat whitespace specially. */ + public static final Factory FACTORY = new Factory() { + public RawText create(byte[] input) { + return new RawText(input); + } + }; + /** Number of bytes to check for heuristics in {@link #isBinary(byte[])} */ private static final int FIRST_FEW_BYTES = 8000; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreAllWhitespace.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreAllWhitespace.java index f72259605..211618a3f 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreAllWhitespace.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreAllWhitespace.java @@ -51,6 +51,13 @@ * A version of {@link RawText} that ignores all whitespace. */ public class RawTextIgnoreAllWhitespace extends RawText { + /** Creates RawText that ignores all whitespace. */ + @SuppressWarnings("hiding") + public static final Factory FACTORY = new Factory() { + public RawText create(byte[] input) { + return new RawTextIgnoreAllWhitespace(input); + } + }; /** * Create a new sequence from an existing content byte array. diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreLeadingWhitespace.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreLeadingWhitespace.java index 1d928766a..23778973b 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreLeadingWhitespace.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreLeadingWhitespace.java @@ -50,6 +50,13 @@ * A version of {@link RawText} that ignores leading whitespace. */ public class RawTextIgnoreLeadingWhitespace extends RawText { + /** Creates RawText that ignores only leading whitespace. */ + @SuppressWarnings("hiding") + public static final Factory FACTORY = new Factory() { + public RawText create(byte[] input) { + return new RawTextIgnoreLeadingWhitespace(input); + } + }; /** * Create a new sequence from an existing content byte array. diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreTrailingWhitespace.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreTrailingWhitespace.java index b9095940c..3feb2e783 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreTrailingWhitespace.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreTrailingWhitespace.java @@ -50,6 +50,13 @@ * A version of {@link RawText} that ignores trailing whitespace. */ public class RawTextIgnoreTrailingWhitespace extends RawText { + /** Creates RawText that ignores only trailing whitespace. */ + @SuppressWarnings("hiding") + public static final Factory FACTORY = new Factory() { + public RawText create(byte[] input) { + return new RawTextIgnoreTrailingWhitespace(input); + } + }; /** * Create a new sequence from an existing content byte array. diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreWhitespaceChange.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreWhitespaceChange.java index 399f038bc..e6bd8e98b 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreWhitespaceChange.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreWhitespaceChange.java @@ -45,14 +45,21 @@ package org.eclipse.jgit.diff; import static org.eclipse.jgit.util.RawCharUtil.isWhitespace; -import static org.eclipse.jgit.util.RawCharUtil.trimTrailingWhitespace; import static org.eclipse.jgit.util.RawCharUtil.trimLeadingWhitespace; +import static org.eclipse.jgit.util.RawCharUtil.trimTrailingWhitespace; /** * A version of {@link RawText} that ignores changes in the amount of * whitespace, as well as trailing whitespace. */ public class RawTextIgnoreWhitespaceChange extends RawText { + /** Creates RawText that ignores only whitespace changes. */ + @SuppressWarnings("hiding") + public static final Factory FACTORY = new Factory() { + public RawText create(byte[] input) { + return new RawTextIgnoreWhitespaceChange(input); + } + }; /** * Create a new sequence from an existing content byte array. From e9de5643fad6abdfe081d1a11416f2e4fa669038 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Sat, 3 Jul 2010 18:16:20 -0700 Subject: [PATCH 08/17] Cache the diff configuration section This way we don't have to reparse for the rename limit every time we create a new rename detector for a repository. Change-Id: I669d031690b85ef4da5e39189be7173fb773fc56 Signed-off-by: Shawn O. Pearce --- .../src/org/eclipse/jgit/diff/DiffConfig.java | 68 +++++++++++++++++++ .../org/eclipse/jgit/diff/RenameDetector.java | 5 +- 2 files changed, 70 insertions(+), 3 deletions(-) create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffConfig.java diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffConfig.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffConfig.java new file mode 100644 index 000000000..91b7467ae --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffConfig.java @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +import org.eclipse.jgit.lib.Config; +import org.eclipse.jgit.lib.Config.SectionParser; + +/** Keeps track of diff related configuration options. */ +public class DiffConfig { + /** Key for {@link Config#get(SectionParser)}. */ + public static final Config.SectionParser KEY = new SectionParser() { + public DiffConfig parse(final Config cfg) { + return new DiffConfig(cfg); + } + }; + + private final int renameLimit; + + private DiffConfig(final Config rc) { + renameLimit = rc.getInt("diff", "renamelimit", 200); + } + + /** @return limit on number of paths to perform inexact rename detection. */ + public int getRenameLimit() { + return renameLimit; + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java index 8116e87ef..87a67a79c 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java @@ -54,7 +54,6 @@ import org.eclipse.jgit.JGitText; import org.eclipse.jgit.diff.DiffEntry.ChangeType; import org.eclipse.jgit.lib.AbbreviatedObjectId; -import org.eclipse.jgit.lib.Config; import org.eclipse.jgit.lib.FileMode; import org.eclipse.jgit.lib.NullProgressMonitor; import org.eclipse.jgit.lib.ProgressMonitor; @@ -126,8 +125,8 @@ private int sortOf(ChangeType changeType) { public RenameDetector(Repository repo) { this.repo = repo; - Config cfg = repo.getConfig(); - renameLimit = cfg.getInt("diff", "renamelimit", 200); + DiffConfig cfg = repo.getConfig().get(DiffConfig.KEY); + renameLimit = cfg.getRenameLimit(); } /** From 1913b41bc749ed8a176d7ea8100fb4b260547523 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Sat, 3 Jul 2010 18:13:10 -0700 Subject: [PATCH 09/17] log: Implement --follow The FollowFilter can be installed on a RevWalk to cause the path to be updated through rename detection when the affected file is found to be added to the project. The filter works reasonably well, for example we can follow the history of the fsck command in git-core: $ jgit log --name-status --follow builtin/fsck.c | grep ^R R100 builtin-fsck.c builtin/fsck.c R099 fsck.c builtin-fsck.c R099 fsck-objects.c fsck.c R099 fsck-cache.c fsck-objects.c Change-Id: I4017bcfd150126aa342fdd423a688493ca660a1f Signed-off-by: Shawn O. Pearce --- .../src/org/eclipse/jgit/pgm/Log.java | 63 +++++++-- .../eclipse/jgit/pgm/RevWalkTextBuiltin.java | 10 +- .../eclipse/jgit/revwalk/FollowFilter.java | 120 ++++++++++++++++++ .../jgit/revwalk/RewriteTreeFilter.java | 46 ++++++- .../jgit/treewalk/filter/PathFilter.java | 7 +- 5 files changed, 233 insertions(+), 13 deletions(-) create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/revwalk/FollowFilter.java diff --git a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Log.java b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Log.java index 83ef6eb87..48a05915e 100644 --- a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Log.java +++ b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Log.java @@ -51,6 +51,7 @@ import java.text.MessageFormat; import java.text.SimpleDateFormat; import java.util.Collection; +import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Locale; @@ -65,11 +66,12 @@ import org.eclipse.jgit.diff.RawTextIgnoreTrailingWhitespace; import org.eclipse.jgit.diff.RawTextIgnoreWhitespaceChange; import org.eclipse.jgit.diff.RenameDetector; +import org.eclipse.jgit.diff.DiffEntry.ChangeType; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.PersonIdent; import org.eclipse.jgit.lib.Ref; -import org.eclipse.jgit.lib.TextProgressMonitor; +import org.eclipse.jgit.revwalk.FollowFilter; import org.eclipse.jgit.revwalk.RevCommit; import org.eclipse.jgit.revwalk.RevWalk; import org.eclipse.jgit.treewalk.TreeWalk; @@ -195,20 +197,26 @@ protected void show(final RevCommit c) throws Exception { private void showDiff(RevCommit c) throws IOException { final TreeWalk tw = new TreeWalk(db); - tw.reset(); tw.setRecursive(true); + tw.reset(); tw.addTree(c.getParent(0).getTree()); tw.addTree(c.getTree()); - tw.setFilter(AndTreeFilter.create(TreeFilter.ANY_DIFF, pathFilter)); + tw.setFilter(AndTreeFilter.create(pathFilter, TreeFilter.ANY_DIFF)); List files = DiffEntry.scan(tw); - if (detectRenames) { - RenameDetector rd = new RenameDetector(db); - if (renameLimit != null) - rd.setRenameLimit(renameLimit.intValue()); - rd.addAll(files); - files = rd.compute(new TextProgressMonitor()); - } + if (pathFilter instanceof FollowFilter && isAdd(files)) { + // The file we are following was added here, find where it + // came from so we can properly show the rename or copy, + // then continue digging backwards. + // + tw.reset(); + tw.addTree(c.getParent(0).getTree()); + tw.addTree(c.getTree()); + tw.setFilter(TreeFilter.ANY_DIFF); + files = updateFollowFilter(detectRenames(DiffEntry.scan(tw))); + + } else if (detectRenames) + files = detectRenames(files); if (showNameAndStatusOnly) { Diff.nameStatus(out, files); @@ -220,4 +228,39 @@ private void showDiff(RevCommit c) throws IOException { } out.println(); } + + private List detectRenames(List files) + throws IOException { + RenameDetector rd = new RenameDetector(db); + if (renameLimit != null) + rd.setRenameLimit(renameLimit.intValue()); + rd.addAll(files); + return rd.compute(); + } + + private boolean isAdd(List files) { + String oldPath = ((FollowFilter) pathFilter).getPath(); + for (DiffEntry ent : files) { + if (ent.getChangeType() == ChangeType.ADD + && ent.getNewName().equals(oldPath)) + return true; + } + return false; + } + + private List updateFollowFilter(List files) { + String oldPath = ((FollowFilter) pathFilter).getPath(); + for (DiffEntry ent : files) { + if (isRename(ent) && ent.getNewName().equals(oldPath)) { + pathFilter = FollowFilter.create(ent.getOldName()); + return Collections.singletonList(ent); + } + } + return Collections.emptyList(); + } + + private static boolean isRename(DiffEntry ent) { + return ent.getChangeType() == ChangeType.RENAME + || ent.getChangeType() == ChangeType.COPY; + } } diff --git a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/RevWalkTextBuiltin.java b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/RevWalkTextBuiltin.java index beb961d99..bf3924b70 100644 --- a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/RevWalkTextBuiltin.java +++ b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/RevWalkTextBuiltin.java @@ -53,6 +53,7 @@ import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.pgm.opt.PathTreeFilterHandler; +import org.eclipse.jgit.revwalk.FollowFilter; import org.eclipse.jgit.revwalk.ObjectWalk; import org.eclipse.jgit.revwalk.RevCommit; import org.eclipse.jgit.revwalk.RevFlag; @@ -110,6 +111,11 @@ void enableBoundary(final boolean on) { enableRevSort(RevSort.BOUNDARY, on); } + @Option(name = "--follow", metaVar = "metaVar_path") + void follow(final String path) { + pathFilter = FollowFilter.create(path); + } + @Argument(index = 0, metaVar = "metaVar_commitish") private final List commits = new ArrayList(); @@ -139,7 +145,9 @@ protected void run() throws Exception { for (final RevSort s : sorting) walk.sort(s, true); - if (pathFilter != TreeFilter.ALL) + if (pathFilter instanceof FollowFilter) + walk.setTreeFilter(pathFilter); + else if (pathFilter != TreeFilter.ALL) walk.setTreeFilter(AndTreeFilter.create(pathFilter, TreeFilter.ANY_DIFF)); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/FollowFilter.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/FollowFilter.java new file mode 100644 index 000000000..953620617 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/FollowFilter.java @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.revwalk; + +import java.io.IOException; + +import org.eclipse.jgit.errors.IncorrectObjectTypeException; +import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.treewalk.TreeWalk; +import org.eclipse.jgit.treewalk.filter.PathFilter; +import org.eclipse.jgit.treewalk.filter.TreeFilter; + +/** + * Updates the internal path filter to follow copy/renames. + *

+ * This is a special filter that performs {@code AND(path, ANY_DIFF)}, but also + * triggers rename detection so that the path node is updated to include a prior + * file name as the RevWalk traverses history. + *

+ * Results with this filter are unpredictable if the path being followed is a + * subdirectory. + */ +public class FollowFilter extends TreeFilter { + /** + * Create a new tree filter for a user supplied path. + *

+ * Path strings are relative to the root of the repository. If the user's + * input should be assumed relative to a subdirectory of the repository the + * caller must prepend the subdirectory's path prior to creating the filter. + *

+ * Path strings use '/' to delimit directories on all platforms. + * + * @param path + * the path to filter on. Must not be the empty string. All + * trailing '/' characters will be trimmed before string's length + * is checked or is used as part of the constructed filter. + * @return a new filter for the requested path. + * @throws IllegalArgumentException + * the path supplied was the empty string. + */ + public static FollowFilter create(String path) { + return new FollowFilter(PathFilter.create(path)); + } + + private final PathFilter path; + + FollowFilter(final PathFilter path) { + this.path = path; + } + + /** @return the path this filter matches. */ + public String getPath() { + return path.getPath(); + } + + @Override + public boolean include(final TreeWalk walker) + throws MissingObjectException, IncorrectObjectTypeException, + IOException { + return path.include(walker) && ANY_DIFF.include(walker); + } + + @Override + public boolean shouldBeRecursive() { + return path.shouldBeRecursive() || ANY_DIFF.shouldBeRecursive(); + } + + @Override + public TreeFilter clone() { + return new FollowFilter(path.clone()); + } + + @Override + public String toString() { + return "(FOLLOW(" + path.toString() + ")" // + + " AND " // + + ANY_DIFF.toString() + ")"; + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RewriteTreeFilter.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RewriteTreeFilter.java index d7e5c8028..4c5a2a77e 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RewriteTreeFilter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RewriteTreeFilter.java @@ -44,11 +44,17 @@ package org.eclipse.jgit.revwalk; import java.io.IOException; +import java.util.List; +import org.eclipse.jgit.diff.DiffEntry; +import org.eclipse.jgit.diff.RenameDetector; +import org.eclipse.jgit.diff.DiffEntry.ChangeType; +import org.eclipse.jgit.errors.CorruptObjectException; import org.eclipse.jgit.errors.IncorrectObjectTypeException; import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.errors.StopWalkException; import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.revwalk.filter.RevFilter; import org.eclipse.jgit.treewalk.TreeWalk; import org.eclipse.jgit.treewalk.filter.TreeFilter; @@ -76,8 +82,11 @@ class RewriteTreeFilter extends RevFilter { private final TreeWalk pathFilter; + private final Repository repo; + RewriteTreeFilter(final RevWalk walker, final TreeFilter t) { - pathFilter = new TreeWalk(walker.db); + repo = walker.db; + pathFilter = new TreeWalk(repo); pathFilter.setFilter(t); pathFilter.setRecursive(t.shouldBeRecursive()); } @@ -128,6 +137,13 @@ public boolean include(final RevWalk walker, final RevCommit c) // We have interesting items, but neither of the special // cases denoted above. // + if (adds > 0 && tw.getFilter() instanceof FollowFilter) { + // One of the paths we care about was added in this + // commit. We need to update our filter to its older + // name, if we can discover it. Find out what that is. + // + updateFollowFilter(trees); + } return true; } } else if (nParents == 0) { @@ -213,4 +229,32 @@ public boolean include(final RevWalk walker, final RevCommit c) c.flags |= REWRITE; return false; } + + private void updateFollowFilter(ObjectId[] trees) + throws MissingObjectException, IncorrectObjectTypeException, + CorruptObjectException, IOException { + TreeWalk tw = pathFilter; + FollowFilter oldFilter = (FollowFilter) tw.getFilter(); + tw.setFilter(TreeFilter.ANY_DIFF); + tw.reset(trees); + + List files = DiffEntry.scan(tw); + RenameDetector rd = new RenameDetector(repo); + rd.addAll(files); + files = rd.compute(); + + TreeFilter newFilter = oldFilter; + for (DiffEntry ent : files) { + if (isRename(ent) && ent.getNewName().equals(oldFilter.getPath())) { + newFilter = FollowFilter.create(ent.getOldName()); + break; + } + } + tw.setFilter(newFilter); + } + + private static boolean isRename(DiffEntry ent) { + return ent.getChangeType() == ChangeType.RENAME + || ent.getChangeType() == ChangeType.COPY; + } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/PathFilter.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/PathFilter.java index e31778984..3f83114fd 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/PathFilter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/PathFilter.java @@ -90,6 +90,11 @@ private PathFilter(final String s) { pathRaw = Constants.encode(pathStr); } + /** @return the path this filter matches. */ + public String getPath() { + return pathStr; + } + @Override public boolean include(final TreeWalk walker) { return walker.isPathPrefix(pathRaw, pathRaw.length) == 0; @@ -104,7 +109,7 @@ public boolean shouldBeRecursive() { } @Override - public TreeFilter clone() { + public PathFilter clone() { return this; } From a8b29afd82795b3d98b42bf214fea27ab61984cc Mon Sep 17 00:00:00 2001 From: Jeff Schumacher Date: Thu, 8 Jul 2010 09:59:36 -0700 Subject: [PATCH 10/17] Create FileHeader from DiffEntry Added support for converting DiffEntrys to FileHeaders. FileHeaders are DiffEntrys with a buffer containing the diff output as well as a list of HunkHeaders. The HunkHeaders contain EditLists. The createFileHeader(DiffEntry) method in DiffFormatter performs a Myers Diff on the files refered to by the DiffEntry, then puts the returned EditList into a single HunkHeader, which is then put into the FileHeader to be returned. It also generates the appropriate diff header an puts it into the FileHeader's buffer. The rest of the diff output, which would normally be parsed to generate the HunkHeaders, is not generated. In fact, the purpose of this method is to avoid the costly diff output generation and parsing normally required to create a FileHeader. Change-Id: I7d8b18c0f6c85e3d02ad58995d3d231e69af5887 --- .../eclipse/jgit/diff/DiffFormatterTest.java | 186 ++++++++++++++ .../src/org/eclipse/jgit/diff/DiffEntry.java | 7 + .../org/eclipse/jgit/diff/DiffFormatter.java | 226 ++++++++++++------ .../org/eclipse/jgit/patch/FileHeader.java | 19 ++ .../org/eclipse/jgit/patch/HunkHeader.java | 93 ++++--- .../src/org/eclipse/jgit/patch/Patch.java | 2 +- 6 files changed, 416 insertions(+), 117 deletions(-) create mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffFormatterTest.java diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffFormatterTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffFormatterTest.java new file mode 100644 index 000000000..996ee35a1 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffFormatterTest.java @@ -0,0 +1,186 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +import org.eclipse.jgit.diff.DiffEntry.ChangeType; +import org.eclipse.jgit.junit.TestRepository; +import org.eclipse.jgit.lib.FileMode; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.RepositoryTestCase; +import org.eclipse.jgit.patch.FileHeader; +import org.eclipse.jgit.patch.HunkHeader; +import org.eclipse.jgit.util.RawParseUtils; +import org.eclipse.jgit.util.io.DisabledOutputStream; + +public class DiffFormatterTest extends RepositoryTestCase { + private static final String DIFF = "diff --git "; + + private static final String REGULAR_FILE = "100644"; + + private static final String GITLINK = "160000"; + + private static final String PATH_A = "src/a"; + + private static final String PATH_B = "src/b"; + + private DiffFormatter df; + + private TestRepository testDb; + + @Override + public void setUp() throws Exception { + super.setUp(); + testDb = new TestRepository(db); + df = new DiffFormatter(DisabledOutputStream.INSTANCE); + df.setRepository(db); + } + + public void testCreateFileHeader_Modify() throws Exception { + ObjectId adId = blob("a\nd\n"); + ObjectId abcdId = blob("a\nb\nc\nd\n"); + + String diffHeader = makeDiffHeader(PATH_A, PATH_A, adId, abcdId); + + DiffEntry ad = DiffEntry.delete(PATH_A, adId); + DiffEntry abcd = DiffEntry.add(PATH_A, abcdId); + + DiffEntry mod = DiffEntry.pair(ChangeType.MODIFY, ad, abcd, 0); + + FileHeader fh = df.createFileHeader(mod); + + assertEquals(diffHeader, RawParseUtils.decode(fh.getBuffer())); + assertEquals(0, fh.getStartOffset()); + assertEquals(fh.getBuffer().length, fh.getEndOffset()); + assertEquals(FileHeader.PatchType.UNIFIED, fh.getPatchType()); + + assertEquals(1, fh.getHunks().size()); + + HunkHeader hh = fh.getHunks().get(0); + assertEquals(1, hh.toEditList().size()); + + EditList el = hh.toEditList(); + assertEquals(1, el.size()); + + Edit e = el.get(0); + assertEquals(1, e.getBeginA()); + assertEquals(1, e.getEndA()); + assertEquals(1, e.getBeginB()); + assertEquals(3, e.getEndB()); + assertEquals(Edit.Type.INSERT, e.getType()); + } + + public void testCreateFileHeader_Binary() throws Exception { + ObjectId adId = blob("a\nd\n"); + ObjectId binId = blob("a\nb\nc\n\0\0\0\0d\n"); + + String diffHeader = makeDiffHeader(PATH_A, PATH_B, adId, binId) + + "Binary files differ\n"; + + DiffEntry ad = DiffEntry.delete(PATH_A, adId); + DiffEntry abcd = DiffEntry.add(PATH_B, binId); + + DiffEntry mod = DiffEntry.pair(ChangeType.MODIFY, ad, abcd, 0); + + FileHeader fh = df.createFileHeader(mod); + + assertEquals(diffHeader, RawParseUtils.decode(fh.getBuffer())); + assertEquals(FileHeader.PatchType.BINARY, fh.getPatchType()); + + assertEquals(1, fh.getHunks().size()); + + HunkHeader hh = fh.getHunks().get(0); + assertEquals(0, hh.toEditList().size()); + } + + public void testCreateFileHeader_GitLink() throws Exception { + ObjectId aId = blob("a\n"); + ObjectId bId = blob("b\n"); + + String diffHeader = makeDiffHeaderModeChange(PATH_A, PATH_A, aId, bId, + GITLINK, REGULAR_FILE) + + "-Subproject commit " + aId.name() + "\n"; + + DiffEntry ad = DiffEntry.delete(PATH_A, aId); + ad.oldMode = FileMode.GITLINK; + DiffEntry abcd = DiffEntry.add(PATH_A, bId); + + DiffEntry mod = DiffEntry.pair(ChangeType.MODIFY, ad, abcd, 0); + + FileHeader fh = df.createFileHeader(mod); + + assertEquals(diffHeader, RawParseUtils.decode(fh.getBuffer())); + + assertEquals(1, fh.getHunks().size()); + + HunkHeader hh = fh.getHunks().get(0); + assertEquals(0, hh.toEditList().size()); + } + + private String makeDiffHeader(String pathA, String pathB, ObjectId aId, + ObjectId bId) { + String a = aId.abbreviate(db).name(); + String b = bId.abbreviate(db).name(); + return DIFF + "a/" + pathA + " " + "b/" + pathB + "\n" + // + "index " + a + ".." + b + " " + REGULAR_FILE + "\n" + // + "--- a/" + pathA + "\n" + // + "+++ b/" + pathB + "\n"; + } + + private String makeDiffHeaderModeChange(String pathA, String pathB, + ObjectId aId, ObjectId bId, String modeA, String modeB) { + String a = aId.abbreviate(db).name(); + String b = bId.abbreviate(db).name(); + return DIFF + "a/" + pathA + " " + "b/" + pathB + "\n" + // + "old mode " + modeA + "\n" + // + "new mode " + modeB + "\n" + // + "index " + a + ".." + b + "\n" + // + "--- a/" + pathA + "\n" + // + "+++ b/" + pathB + "\n"; + } + + private ObjectId blob(String content) throws Exception { + return testDb.blob(content).copy(); + } + +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffEntry.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffEntry.java index e41ec5670..4d25dbd02 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffEntry.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffEntry.java @@ -81,6 +81,13 @@ public static enum ChangeType { COPY; } + /** + * Create an empty DiffEntry + */ + protected DiffEntry(){ + // reduce the visibility of the default constructor + } + /** * Convert the TreeWalk into DiffEntry headers. * diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java index c12c54b05..cdcc5e63e 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java @@ -48,18 +48,24 @@ import static org.eclipse.jgit.lib.Constants.encodeASCII; import static org.eclipse.jgit.lib.FileMode.GITLINK; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.List; import org.eclipse.jgit.JGitText; +import org.eclipse.jgit.errors.CorruptObjectException; +import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.lib.AbbreviatedObjectId; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.FileMode; import org.eclipse.jgit.lib.ObjectLoader; import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.patch.FileHeader; +import org.eclipse.jgit.patch.HunkHeader; +import org.eclipse.jgit.patch.FileHeader.PatchType; import org.eclipse.jgit.util.QuotedString; +import org.eclipse.jgit.util.io.DisabledOutputStream; /** * Format an {@link EditList} as a Git style unified patch script. @@ -185,87 +191,10 @@ public void format(List entries) throws IOException { * be written to. */ public void format(DiffEntry ent) throws IOException { - String oldName = quotePath("a/" + ent.getOldName()); - String newName = quotePath("b/" + ent.getNewName()); - out.write(encode("diff --git " + oldName + " " + newName + "\n")); - - switch (ent.getChangeType()) { - case ADD: - out.write(encodeASCII("new file mode ")); - ent.getNewMode().copyTo(out); - out.write('\n'); - break; - - case DELETE: - out.write(encodeASCII("deleted file mode ")); - ent.getOldMode().copyTo(out); - out.write('\n'); - break; - - case RENAME: - out.write(encodeASCII("similarity index " + ent.getScore() + "%")); - out.write('\n'); - - out.write(encode("rename from " + quotePath(ent.getOldName()))); - out.write('\n'); - - out.write(encode("rename to " + quotePath(ent.getNewName()))); - out.write('\n'); - break; - - case COPY: - out.write(encodeASCII("similarity index " + ent.getScore() + "%")); - out.write('\n'); - - out.write(encode("copy from " + quotePath(ent.getOldName()))); - out.write('\n'); - - out.write(encode("copy to " + quotePath(ent.getNewName()))); - out.write('\n'); - - if (!ent.getOldMode().equals(ent.getNewMode())) { - out.write(encodeASCII("new file mode ")); - ent.getNewMode().copyTo(out); - out.write('\n'); - } - break; - } - - switch (ent.getChangeType()) { - case RENAME: - case MODIFY: - if (!ent.getOldMode().equals(ent.getNewMode())) { - out.write(encodeASCII("old mode ")); - ent.getOldMode().copyTo(out); - out.write('\n'); - - out.write(encodeASCII("new mode ")); - ent.getNewMode().copyTo(out); - out.write('\n'); - } - } - - out.write(encodeASCII("index " // - + format(ent.getOldId()) // - + ".." // - + format(ent.getNewId()))); - if (ent.getOldMode().equals(ent.getNewMode())) { - out.write(' '); - ent.getNewMode().copyTo(out); - } - out.write('\n'); - out.write(encode("--- " + oldName + '\n')); - out.write(encode("+++ " + newName + '\n')); + writeDiffHeader(out, ent); if (ent.getOldMode() == GITLINK || ent.getNewMode() == GITLINK) { - if (ent.getOldMode() == GITLINK) { - out.write(encodeASCII("-Subproject commit " - + ent.getOldId().name() + "\n")); - } - if (ent.getNewMode() == GITLINK) { - out.write(encodeASCII("+Subproject commit " - + ent.getNewId().name() + "\n")); - } + writeGitLinkDiffText(out, ent); } else { byte[] aRaw = open(ent.getOldMode(), ent.getOldId()); byte[] bRaw = open(ent.getNewMode(), ent.getNewId()); @@ -281,6 +210,93 @@ public void format(DiffEntry ent) throws IOException { } } + private void writeGitLinkDiffText(OutputStream o, DiffEntry ent) + throws IOException { + if (ent.getOldMode() == GITLINK) { + o.write(encodeASCII("-Subproject commit " + ent.getOldId().name() + + "\n")); + } + if (ent.getNewMode() == GITLINK) { + o.write(encodeASCII("+Subproject commit " + ent.getNewId().name() + + "\n")); + } + } + + private void writeDiffHeader(OutputStream o, DiffEntry ent) + throws IOException { + String oldName = quotePath("a/" + ent.getOldName()); + String newName = quotePath("b/" + ent.getNewName()); + o.write(encode("diff --git " + oldName + " " + newName + "\n")); + + switch (ent.getChangeType()) { + case ADD: + o.write(encodeASCII("new file mode ")); + ent.getNewMode().copyTo(o); + o.write('\n'); + break; + + case DELETE: + o.write(encodeASCII("deleted file mode ")); + ent.getOldMode().copyTo(o); + o.write('\n'); + break; + + case RENAME: + o.write(encodeASCII("similarity index " + ent.getScore() + "%")); + o.write('\n'); + + o.write(encode("rename from " + quotePath(ent.getOldName()))); + o.write('\n'); + + o.write(encode("rename to " + quotePath(ent.getNewName()))); + o.write('\n'); + break; + + case COPY: + o.write(encodeASCII("similarity index " + ent.getScore() + "%")); + o.write('\n'); + + o.write(encode("copy from " + quotePath(ent.getOldName()))); + o.write('\n'); + + o.write(encode("copy to " + quotePath(ent.getNewName()))); + o.write('\n'); + + if (!ent.getOldMode().equals(ent.getNewMode())) { + o.write(encodeASCII("new file mode ")); + ent.getNewMode().copyTo(o); + o.write('\n'); + } + break; + } + + switch (ent.getChangeType()) { + case RENAME: + case MODIFY: + if (!ent.getOldMode().equals(ent.getNewMode())) { + o.write(encodeASCII("old mode ")); + ent.getOldMode().copyTo(o); + o.write('\n'); + + o.write(encodeASCII("new mode ")); + ent.getNewMode().copyTo(o); + o.write('\n'); + } + } + + o.write(encodeASCII("index " // + + format(ent.getOldId()) // + + ".." // + + format(ent.getNewId()))); + if (ent.getOldMode().equals(ent.getNewMode())) { + o.write(' '); + ent.getNewMode().copyTo(o); + } + o.write('\n'); + o.write(encode("--- " + oldName + '\n')); + o.write(encode("+++ " + newName + '\n')); + } + private String format(AbbreviatedObjectId oldId) { if (oldId.isComplete() && db != null) oldId = oldId.toObjectId().abbreviate(db, abbreviationLength); @@ -513,6 +529,58 @@ protected void writeLine(final char prefix, final RawText text, out.write('\n'); } + /** + * Creates a {@link FileHeader} representing the given {@link DiffEntry} + *

+ * This method does not use the OutputStream associated with this + * DiffFormatter instance. It is therefore safe to instantiate this + * DiffFormatter instance with a {@link DisabledOutputStream} if this method + * is the only one that will be used. + * + * @param ent + * the DiffEntry to create the FileHeader for + * @return a FileHeader representing the DiffEntry. The FileHeader's buffer + * will contain only the header of the diff output. It will also + * contain one {@link HunkHeader}. + * @throws IOException + * the stream threw an exception while writing to it, or one of + * the blobs referenced by the DiffEntry could not be read. + * @throws CorruptObjectException + * one of the blobs referenced by the DiffEntry is corrupt. + * @throws MissingObjectException + * one of the blobs referenced by the DiffEntry is missing. + */ + public FileHeader createFileHeader(DiffEntry ent) throws IOException, + CorruptObjectException, MissingObjectException { + ByteArrayOutputStream buf = new ByteArrayOutputStream(); + final EditList editList; + final FileHeader.PatchType type; + + writeDiffHeader(buf, ent); + + if (ent.getOldMode() == GITLINK || ent.getNewMode() == GITLINK) { + writeGitLinkDiffText(buf, ent); + editList = new EditList(); + type = PatchType.UNIFIED; + } else { + byte[] aRaw = open(ent.getOldMode(), ent.getOldId()); + byte[] bRaw = open(ent.getNewMode(), ent.getNewId()); + + if (RawText.isBinary(aRaw) || RawText.isBinary(bRaw)) { + buf.write(encodeASCII("Binary files differ\n")); + editList = new EditList(); + type = PatchType.BINARY; + } else { + RawText a = rawTextFactory.create(aRaw); + RawText b = rawTextFactory.create(bRaw); + editList = new MyersDiff(a, b).getEdits(); + type = PatchType.UNIFIED; + } + } + + return new FileHeader(buf.toByteArray(), editList, type); + } + private int findCombinedEnd(final List edits, final int i) { int end = i + 1; while (end < edits.size() diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/patch/FileHeader.java b/org.eclipse.jgit/src/org/eclipse/jgit/patch/FileHeader.java index f2f2f2e74..0c24fc6be 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/patch/FileHeader.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/patch/FileHeader.java @@ -134,6 +134,25 @@ public static enum PatchType { /** If {@link #patchType} is {@link PatchType#GIT_BINARY}, the old image */ BinaryHunk reverseBinaryHunk; + /** + * Constructs a new FileHeader + * + * @param headerLines + * buffer holding the diff header for this file + * @param edits + * the edits for this file + * @param type + * the type of patch used to modify this file + */ + public FileHeader(final byte[] headerLines, EditList edits, PatchType type) { + this(headerLines, 0); + endOffset = headerLines.length; + int ptr = parseGitFileName(Patch.DIFF_GIT.length, headerLines.length); + ptr = parseGitHeaders(ptr, headerLines.length); + this.patchType = type; + addHunk(new HunkHeader(this, edits)); + } + FileHeader(final byte[] b, final int offset) { buf = b; startOffset = offset; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/patch/HunkHeader.java b/org.eclipse.jgit/src/org/eclipse/jgit/patch/HunkHeader.java index bfb20b64e..a6ea74eb1 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/patch/HunkHeader.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/patch/HunkHeader.java @@ -116,6 +116,8 @@ public int getLinesAdded() { /** Total number of lines of context appearing in this hunk */ int nContext; + private EditList editList; + HunkHeader(final FileHeader fh, final int offset) { this(fh, offset, new OldImage() { @Override @@ -131,6 +133,21 @@ public AbbreviatedObjectId getId() { old = oi; } + HunkHeader(final FileHeader fh, final EditList editList) { + this(fh, fh.buf.length); + this.editList = editList; + endOffset = startOffset; + nContext = 0; + if (editList.isEmpty()) { + newStartLine = 0; + newLineCount = 0; + } else { + newStartLine = editList.get(0).getBeginB(); + Edit last = editList.get(editList.size() - 1); + newLineCount = last.getEndB() - newStartLine; + } + } + /** @return header for the file this hunk applies to */ public FileHeader getFileHeader() { return file; @@ -173,48 +190,50 @@ public int getLinesContext() { /** @return a list describing the content edits performed within the hunk. */ public EditList toEditList() { - final EditList r = new EditList(); - final byte[] buf = file.buf; - int c = nextLF(buf, startOffset); - int oLine = old.startLine; - int nLine = newStartLine; - Edit in = null; + if (editList == null) { + editList = new EditList(); + final byte[] buf = file.buf; + int c = nextLF(buf, startOffset); + int oLine = old.startLine; + int nLine = newStartLine; + Edit in = null; - SCAN: for (; c < endOffset; c = nextLF(buf, c)) { - switch (buf[c]) { - case ' ': - case '\n': - in = null; - oLine++; - nLine++; - continue; + SCAN: for (; c < endOffset; c = nextLF(buf, c)) { + switch (buf[c]) { + case ' ': + case '\n': + in = null; + oLine++; + nLine++; + continue; - case '-': - if (in == null) { - in = new Edit(oLine - 1, nLine - 1); - r.add(in); + case '-': + if (in == null) { + in = new Edit(oLine - 1, nLine - 1); + editList.add(in); + } + oLine++; + in.extendA(); + continue; + + case '+': + if (in == null) { + in = new Edit(oLine - 1, nLine - 1); + editList.add(in); + } + nLine++; + in.extendB(); + continue; + + case '\\': // Matches "\ No newline at end of file" + continue; + + default: + break SCAN; } - oLine++; - in.extendA(); - continue; - - case '+': - if (in == null) { - in = new Edit(oLine - 1, nLine - 1); - r.add(in); - } - nLine++; - in.extendB(); - continue; - - case '\\': // Matches "\ No newline at end of file" - continue; - - default: - break SCAN; } } - return r; + return editList; } void parseHeader() { diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/patch/Patch.java b/org.eclipse.jgit/src/org/eclipse/jgit/patch/Patch.java index ce006dadb..cf42b665a 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/patch/Patch.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/patch/Patch.java @@ -60,7 +60,7 @@ /** A parsed collection of {@link FileHeader}s from a unified diff patch file */ public class Patch { - private static final byte[] DIFF_GIT = encodeASCII("diff --git "); + static final byte[] DIFF_GIT = encodeASCII("diff --git "); private static final byte[] DIFF_CC = encodeASCII("diff --cc "); From 64b9458640bafadc70028c99bb038480309e568d Mon Sep 17 00:00:00 2001 From: Jeff Schumacher Date: Fri, 9 Jul 2010 11:18:50 -0700 Subject: [PATCH 11/17] Added file size based rename detection optimization Prior to this change, files that were very different in size (enough so that they could not have enough in common to be detected as renames) were still having their scores calculated. I added an optimization to skip such files. For example, if the rename detection threshold is 60%, the larger file is 200kb, and the smaller file is 50kb, the pair cannot be counted as a rename since they cannot possibly share 60% of their content in common. (200*.6=120, 120>50) Change-Id: Icd8315412d5de6292839778e7cea7fe6f061b0fc --- .../jgit/diff/SimilarityRenameDetector.java | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java index c92b1e3bb..a343fc062 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java @@ -205,6 +205,14 @@ private int buildMatrix(ProgressMonitor pm) throws IOException { // matrix = new long[srcs.size() * dsts.size()]; + long[] srcSizes = new long[srcs.size()]; + long[] dstSizes = new long[dsts.size()]; + + // Init the size arrays to some value that indicates that we haven't + // calculated the size yet. Since sizes cannot be negative, -1 will work + Arrays.fill(srcSizes, -1); + Arrays.fill(dstSizes, -1); + // Consider each pair of files, if the score is above the minimum // threshold we need record that scoring in the matrix so we can // later find the best matches. @@ -231,6 +239,26 @@ private int buildMatrix(ProgressMonitor pm) throws IOException { continue; } + long srcSize = srcSizes[srcIdx]; + if (srcSize < 0) { + srcSize = size(srcEnt.oldId.toObjectId()); + srcSizes[srcIdx] = srcSize; + } + + long dstSize = dstSizes[dstIdx]; + if (dstSize < 0) { + dstSize = size(dstEnt.newId.toObjectId()); + dstSizes[dstIdx] = dstSize; + } + + long max = Math.max(srcSize, dstSize); + long min = Math.min(srcSize, dstSize); + if (min * 100 / max < renameScore) { + // Cannot possibly match, as the file sizes are so different + pm.update(1); + continue; + } + SimilarityIndex d = hash(dstEnt.newId.toObjectId()); int score = s.score(d); @@ -259,6 +287,10 @@ private SimilarityIndex hash(ObjectId objectId) throws IOException { return r; } + private long size(ObjectId objectId) throws IOException { + return repo.openObject(objectId).getSize(); + } + private static int score(long value) { return (int) (value >>> SCORE_SHIFT); } From 4c14b7869d47a28aca29e219a3bae10f43083b33 Mon Sep 17 00:00:00 2001 From: Jeff Schumacher Date: Fri, 9 Jul 2010 12:53:57 -0700 Subject: [PATCH 12/17] Fixed potential div by zero bug The scoring logic in SimilarityIndex was dividing by the max file size. If both files are empty, this would cause a div by zero error. This case cannot currently happen, since two empty files would have the same SHA1, and would therefore be caught in the earlier SHA1 based detection pass. Still, if this logic eventually gets separated from that pass, a div by zero error would occur. I changed the logic to instead consider two empty files to have a similarity score of 100. Change-Id: Ic08e18a066b8fef25bb5e7c62418106a8cee762a --- org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java index d6026b5be..f4cccfc37 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java @@ -144,6 +144,8 @@ void sort() { int score(SimilarityIndex dst) { long max = Math.max(fileSize, dst.fileSize); + if (max == 0) + return 100; return (int) ((common(dst) * 100L) / max); } From 9a48de86d892a3a1f028f4899f34898e58d759b3 Mon Sep 17 00:00:00 2001 From: Jeff Schumacher Date: Fri, 9 Jul 2010 15:11:54 -0700 Subject: [PATCH 13/17] Added file path similarity to scoring metric in rename detection The scoring method was not taking into account the similarity of the file paths and file names. I changed the metric so that it is 99% based on content (which used to be 100% of the old metric), and 1% based on path similarity. Of that 1%, half (.5% of the total final score) is based on the actual file names (e.g. "foo.java"), and half on the directory (e.g. "src/com/foo/bar/"). Change-Id: I94f0c23bf6413c491b10d5625f6ad7d2ecfb4def --- .../eclipse/jgit/diff/RenameDetectorTest.java | 12 ++-- .../jgit/diff/SimilarityIndexTest.java | 8 +-- .../eclipse/jgit/diff/SimilarityIndex.java | 6 +- .../jgit/diff/SimilarityRenameDetector.java | 56 ++++++++++++++++++- 4 files changed, 68 insertions(+), 14 deletions(-) diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java index c4cb600db..fe0c565d6 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java @@ -124,8 +124,8 @@ public void testExactRename_ManyRenames() throws Exception { } public void testInexactRename_OnePair() throws Exception { - ObjectId aId = blob("foo\nbar\nbaz\n"); - ObjectId bId = blob("foo\nbar\nblah\n"); + ObjectId aId = blob("foo\nbar\nbaz\nblarg\n"); + ObjectId bId = blob("foo\nbar\nbaz\nblah\n"); DiffEntry a = DiffEntry.add(PATH_A, aId); DiffEntry b = DiffEntry.delete(PATH_Q, bId); @@ -135,12 +135,12 @@ public void testInexactRename_OnePair() throws Exception { List entries = rd.compute(); assertEquals(1, entries.size()); - assertRename(b, a, 61, entries.get(0)); + assertRename(b, a, 66, entries.get(0)); } public void testInexactRename_OneRenameTwoUnrelatedFiles() throws Exception { - ObjectId aId = blob("foo\nbar\nbaz\n"); - ObjectId bId = blob("foo\nbar\nblah\n"); + ObjectId aId = blob("foo\nbar\nbaz\nblarg\n"); + ObjectId bId = blob("foo\nbar\nbaz\nblah\n"); DiffEntry a = DiffEntry.add(PATH_A, aId); DiffEntry b = DiffEntry.delete(PATH_Q, bId); @@ -158,7 +158,7 @@ public void testInexactRename_OneRenameTwoUnrelatedFiles() throws Exception { assertEquals(3, entries.size()); assertSame(c, entries.get(0)); assertSame(d, entries.get(1)); - assertRename(b, a, 61, entries.get(2)); + assertRename(b, a, 66, entries.get(2)); } public void testInexactRename_LastByteDifferent() throws Exception { diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/SimilarityIndexTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/SimilarityIndexTest.java index 9ab745fac..d6915eb87 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/SimilarityIndexTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/SimilarityIndexTest.java @@ -78,8 +78,8 @@ public void testCommonScore_SameFiles() { assertEquals(8, src.common(dst)); assertEquals(8, dst.common(src)); - assertEquals(100, src.score(dst)); - assertEquals(100, dst.score(src)); + assertEquals(100, src.score(dst, 100)); + assertEquals(100, dst.score(src, 100)); } public void testCommonScore_EmptyFiles() { @@ -102,8 +102,8 @@ public void testCommonScore_SimiliarBy75() { assertEquals(6, src.common(dst)); assertEquals(6, dst.common(src)); - assertEquals(75, src.score(dst)); - assertEquals(75, dst.score(src)); + assertEquals(75, src.score(dst, 100)); + assertEquals(75, dst.score(src, 100)); } private static SimilarityIndex hash(String text) { diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java index f4cccfc37..d5a31d604 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java @@ -142,11 +142,11 @@ void sort() { Arrays.sort(idHash); } - int score(SimilarityIndex dst) { + int score(SimilarityIndex dst, int maxScore) { long max = Math.max(fileSize, dst.fileSize); if (max == 0) - return 100; - return (int) ((common(dst) * 100L) / max); + return maxScore; + return (int) ((common(dst) * maxScore) / max); } int common(SimilarityIndex dst) { diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java index a343fc062..6590f746f 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java @@ -260,7 +260,14 @@ private int buildMatrix(ProgressMonitor pm) throws IOException { } SimilarityIndex d = hash(dstEnt.newId.toObjectId()); - int score = s.score(d); + int contentScore = s.score(d, 10000); + + // nameScore returns a value between 0 and 100, but we want it + // to be in the same range as the content score. This allows it + // to be dropped into the pretty formula for the final score. + int nameScore = nameScore(srcEnt.oldName, dstEnt.newName) * 100; + + int score = (contentScore * 99 + nameScore * 1) / 10000; if (score < renameScore) { pm.update(1); @@ -280,6 +287,53 @@ private int buildMatrix(ProgressMonitor pm) throws IOException { return mNext; } + private int nameScore(String a, String b) { + int aDirLen = a.lastIndexOf("/") + 1; + int bDirLen = b.lastIndexOf("/") + 1; + + int dirMin = Math.min(aDirLen, bDirLen); + int dirMax = Math.max(aDirLen, bDirLen); + + final int dirScoreLtr; + final int dirScoreRtl; + + if (dirMax == 0) { + dirScoreLtr = 100; + dirScoreRtl = 100; + } else { + int dirSim = 0; + for (; dirSim < dirMin; dirSim++) { + if (a.charAt(dirSim) != b.charAt(dirSim)) + break; + } + dirScoreLtr = (dirSim * 100) / dirMax; + + if (dirScoreLtr == 100) { + dirScoreRtl = 100; + } else { + for (dirSim = 0; dirSim < dirMin; dirSim++) { + if (a.charAt(aDirLen - 1 - dirSim) != b.charAt(bDirLen - 1 + - dirSim)) + break; + } + dirScoreRtl = (dirSim * 100) / dirMax; + } + } + + int fileMin = Math.min(a.length() - aDirLen, b.length() - bDirLen); + int fileMax = Math.max(a.length() - aDirLen, b.length() - bDirLen); + + int fileSim = 0; + for (; fileSim < fileMin; fileSim++) { + if (a.charAt(a.length() - 1 - fileSim) != b.charAt(b.length() - 1 + - fileSim)) + break; + } + int fileScore = (fileSim * 100) / fileMax; + + return (((dirScoreLtr + dirScoreRtl) * 25) + (fileScore * 50)) / 100; + } + private SimilarityIndex hash(ObjectId objectId) throws IOException { SimilarityIndex r = new SimilarityIndex(); r.hash(repo.openObject(objectId)); From a20e6f6fec7a696047df86f7d6a85863c4335d83 Mon Sep 17 00:00:00 2001 From: Jeff Schumacher Date: Mon, 12 Jul 2010 10:37:10 -0700 Subject: [PATCH 14/17] Fixed Misleading Javadoc The javadoc for the setRenameLimit method in RenameDetector said that you could only have limits in the range (0,100), implying that 0 and 100 were illegal inputs. The code, however, allowed 0 and 100. I changed the javadoc to say that the range [0,100] was legal. I also documented the IllegalArgumentException that is thrown if the limit is outside that range. Change-Id: I916838f254859f6f0e1516bb55b8e7dc87e57dc2 --- .../src/org/eclipse/jgit/diff/RenameDetector.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java index 87a67a79c..04e669ffe 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java @@ -146,7 +146,9 @@ public int getRenameScore() { * that approximately 60% of the bytes in the files are identical. * * @param score - * new rename score, must be within (0, 100). + * new rename score, must be within [0, 100]. + * @throws IllegalArgumentException + * the score was not within [0, 100]. */ public void setRenameScore(int score) { if (score < 0 || score > 100) From bc08fafb411566522b232e37a2ec971db00676d6 Mon Sep 17 00:00:00 2001 From: Jeff Schumacher Date: Mon, 12 Jul 2010 11:48:56 -0700 Subject: [PATCH 15/17] Added very small optimization to exact rename detection Optimized a small loop in findExactRenames. The loop would go through all the items in a list of DiffEntries even after it already found what it was looking for. I made it break out of the loop as soon as a good match was found. Change-Id: I28741e0c49ce52d8008930a87cd1db7037700a61 --- .../src/org/eclipse/jgit/diff/RenameDetector.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java index 04e669ffe..5bb63c4dd 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java @@ -351,8 +351,10 @@ private void findExactRenames(ProgressMonitor pm) { List list = (List) del; DiffEntry best = null; for (DiffEntry e : list) { - if (best == null && sameType(e.oldMode, dst.newMode)) + if (sameType(e.oldMode, dst.newMode)) { best = e; + break; + } } if (best != null) { if (best.changeType == ChangeType.DELETE) { From f666cc755b35a8f57a7c208c88230f7ab7c1d07b Mon Sep 17 00:00:00 2001 From: Jeff Schumacher Date: Mon, 12 Jul 2010 12:31:18 -0700 Subject: [PATCH 16/17] Added more test cases for RenameDetector I added test cases to cover the majority of the code. It's not 100% coverage yet, but the remaining bits are small. Change-Id: Ib534c8e94b13358b8b22cf54e2ff84132bae6d14 --- .../eclipse/jgit/diff/RenameDetectorTest.java | 117 ++++++++++++++++-- 1 file changed, 108 insertions(+), 9 deletions(-) diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java index fe0c565d6..86b40e2f4 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java @@ -54,6 +54,7 @@ public class RenameDetectorTest extends RepositoryTestCase { private static final String PATH_A = "src/A"; + private static final String PATH_B = "src/B"; private static final String PATH_H = "src/H"; private static final String PATH_Q = "src/Q"; @@ -109,8 +110,8 @@ public void testExactRename_ManyRenames() throws Exception { DiffEntry a = DiffEntry.add(PATH_A, foo); DiffEntry b = DiffEntry.delete(PATH_Q, foo); - DiffEntry c = DiffEntry.add("README", bar); - DiffEntry d = DiffEntry.delete("REEDME", bar); + DiffEntry c = DiffEntry.add(PATH_H, bar); + DiffEntry d = DiffEntry.delete(PATH_B, bar); rd.add(a); rd.add(b); @@ -119,8 +120,30 @@ public void testExactRename_ManyRenames() throws Exception { List entries = rd.compute(); assertEquals(2, entries.size()); - assertRename(d, c, 100, entries.get(0)); - assertRename(b, a, 100, entries.get(1)); + assertRename(b, a, 100, entries.get(0)); + assertRename(d, c, 100, entries.get(1)); + } + + public void testExactRename_MultipleIdenticalDeletes() throws Exception { + ObjectId foo = blob("foo"); + + DiffEntry a = DiffEntry.delete(PATH_A, foo); + DiffEntry b = DiffEntry.delete(PATH_B, foo); + + DiffEntry c = DiffEntry.delete(PATH_H, foo); + DiffEntry d = DiffEntry.add(PATH_Q, foo); + + rd.add(a); + rd.add(b); + rd.add(c); + rd.add(d); + + // Pairs the add with the first delete added + List entries = rd.compute(); + assertEquals(3, entries.size()); + assertEquals(b, entries.get(0)); + assertEquals(c, entries.get(1)); + assertRename(a, d, 100, entries.get(2)); } public void testInexactRename_OnePair() throws Exception { @@ -146,8 +169,8 @@ public void testInexactRename_OneRenameTwoUnrelatedFiles() throws Exception { ObjectId cId = blob("some\nsort\nof\ntext\n"); ObjectId dId = blob("completely\nunrelated\ntext\n"); - DiffEntry c = DiffEntry.add("c", cId); - DiffEntry d = DiffEntry.delete("d", dId); + DiffEntry c = DiffEntry.add(PATH_B, cId); + DiffEntry d = DiffEntry.delete(PATH_H, dId); rd.add(a); rd.add(b); @@ -156,9 +179,9 @@ public void testInexactRename_OneRenameTwoUnrelatedFiles() throws Exception { List entries = rd.compute(); assertEquals(3, entries.size()); - assertSame(c, entries.get(0)); - assertSame(d, entries.get(1)); - assertRename(b, a, 66, entries.get(2)); + assertRename(b, a, 66, entries.get(0)); + assertSame(c, entries.get(1)); + assertSame(d, entries.get(2)); } public void testInexactRename_LastByteDifferent() throws Exception { @@ -176,6 +199,21 @@ public void testInexactRename_LastByteDifferent() throws Exception { assertRename(b, a, 88, entries.get(0)); } + public void testInexactRename_NewlinesOnly() throws Exception { + ObjectId aId = blob("\n\n\n"); + ObjectId bId = blob("\n\n\n\n"); + + DiffEntry a = DiffEntry.add(PATH_A, aId); + DiffEntry b = DiffEntry.delete(PATH_Q, bId); + + rd.add(a); + rd.add(b); + + List entries = rd.compute(); + assertEquals(1, entries.size()); + assertRename(b, a, 74, entries.get(0)); + } + public void testInexactRenames_OnePair2() throws Exception { ObjectId aId = blob("ab\nab\nab\nac\nad\nae\n"); ObjectId bId = blob("ac\nab\nab\nab\naa\na0\na1\n"); @@ -267,6 +305,67 @@ public void testNoRenames_GitlinkAndFile() throws Exception { assertSame(b, entries.get(1)); } + public void testNoRenames_SymlinkAndFileSamePath() throws Exception { + ObjectId aId = blob("src/dest"); + + DiffEntry a = DiffEntry.delete(PATH_A, aId); + DiffEntry b = DiffEntry.add(PATH_A, aId); + a.oldMode = FileMode.SYMLINK; + + rd.add(a); + rd.add(b); + + // Deletes should be first + List entries = rd.compute(); + assertEquals(2, entries.size()); + assertSame(a, entries.get(0)); + assertSame(b, entries.get(1)); + } + + public void testSetRenameScore_IllegalArgs() throws Exception { + try { + rd.setRenameScore(-1); + fail(); + } catch (IllegalArgumentException e) { + // pass + } + + try { + rd.setRenameScore(101); + fail(); + } catch (IllegalArgumentException e) { + // pass + } + } + + public void testRenameLimit() throws Exception { + ObjectId aId = blob("foo\nbar\nbaz\nblarg\n"); + ObjectId bId = blob("foo\nbar\nbaz\nblah\n"); + DiffEntry a = DiffEntry.add(PATH_A, aId); + DiffEntry b = DiffEntry.delete(PATH_B, bId); + + ObjectId cId = blob("a\nb\nc\nd\n"); + ObjectId dId = blob("a\nb\nc\n"); + DiffEntry c = DiffEntry.add(PATH_H, cId); + DiffEntry d = DiffEntry.delete(PATH_Q, dId); + + rd.add(a); + rd.add(b); + rd.add(c); + rd.add(d); + + rd.setRenameLimit(1); + + assertTrue(rd.isOverRenameLimit()); + + List entries = rd.compute(); + assertEquals(4, entries.size()); + assertSame(a, entries.get(0)); + assertSame(b, entries.get(1)); + assertSame(c, entries.get(2)); + assertSame(d, entries.get(3)); + } + private ObjectId blob(String content) throws Exception { return testDb.blob(content).copy(); } From 31311cacfd311f9a78bdf012a9f6ba1907b9964a Mon Sep 17 00:00:00 2001 From: Jeff Schumacher Date: Tue, 13 Jul 2010 12:21:12 -0700 Subject: [PATCH 17/17] Implemented file path based tie breaking to exact rename detection During the exact rename detection phase in RenameDetector, ties were resolved on a first-found basis. I added support for file path based tie breaking during that phase. Basically, there are four situations that have to be handled: One add matching one delete: In this simple case, we pair them as a rename. One add matching many deletes: Find the delete whos path matches the add the closest, and pair them as a rename. Many adds matching one delete: Similar to the above case, we find the add that matches the delete the closest, and pair them as a rename. The other adds are marked as copies of the delete. Many adds matching many deletes: Build a scoring matrix similar to the one used for content- based matching, scoring instead by file path. Some of the utility functions in SimilarityRenameDetector are used in this case, as we use the same encoding scheme. Once the matrix is built, scan it for the best matches, marking them as renames. The rest are marked as copies. I don't particularly like the idea of using utility functions right out of SimilarityRenameDetector, but it works for the moment. A later commit will likely refactor this into a common utility class, as well as bringing exact rename detection out of RenameDetector and into a separate class, much like SimilarityRenameDetector. Change-Id: I1fb08390aebdcbf20d049aecf402a36506e55611 --- .../eclipse/jgit/diff/RenameDetectorTest.java | 58 +++++ .../src/org/eclipse/jgit/diff/DiffEntry.java | 27 +++ .../org/eclipse/jgit/diff/RenameDetector.java | 213 ++++++++++++++---- .../jgit/diff/SimilarityRenameDetector.java | 8 +- 4 files changed, 258 insertions(+), 48 deletions(-) diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java index 86b40e2f4..c32c78668 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java @@ -146,6 +146,48 @@ public void testExactRename_MultipleIdenticalDeletes() throws Exception { assertRename(a, d, 100, entries.get(2)); } + public void testExactRename_PathBreaksTie() throws Exception { + ObjectId foo = blob("foo"); + + DiffEntry a = DiffEntry.add("src/com/foo/a.java", foo); + DiffEntry b = DiffEntry.delete("src/com/foo/b.java", foo); + + DiffEntry c = DiffEntry.add("c.txt", foo); + DiffEntry d = DiffEntry.delete("d.txt", foo); + + // Add out of order to avoid first-match succeeding + rd.add(a); + rd.add(d); + rd.add(b); + rd.add(c); + + List entries = rd.compute(); + assertEquals(2, entries.size()); + assertRename(d, c, 100, entries.get(0)); + assertRename(b, a, 100, entries.get(1)); + } + + public void testExactRename_OneDeleteManyAdds() throws Exception { + ObjectId foo = blob("foo"); + + DiffEntry a = DiffEntry.add("src/com/foo/a.java", foo); + DiffEntry b = DiffEntry.add("src/com/foo/b.java", foo); + DiffEntry c = DiffEntry.add("c.txt", foo); + + DiffEntry d = DiffEntry.delete("d.txt", foo); + + rd.add(a); + rd.add(b); + rd.add(c); + rd.add(d); + + List entries = rd.compute(); + assertEquals(3, entries.size()); + assertRename(d, c, 100, entries.get(0)); + assertCopy(d, a, 100, entries.get(1)); + assertCopy(d, b, 100, entries.get(2)); + } + public void testInexactRename_OnePair() throws Exception { ObjectId aId = blob("foo\nbar\nbaz\nblarg\n"); ObjectId bId = blob("foo\nbar\nbaz\nblah\n"); @@ -385,4 +427,20 @@ private static void assertRename(DiffEntry o, DiffEntry n, int score, assertEquals(score, rename.getScore()); } + + private static void assertCopy(DiffEntry o, DiffEntry n, int score, + DiffEntry copy) { + assertEquals(ChangeType.COPY, copy.getChangeType()); + + assertEquals(o.getOldName(), copy.getOldName()); + assertEquals(n.getNewName(), copy.getNewName()); + + assertEquals(o.getOldMode(), copy.getOldMode()); + assertEquals(n.getNewMode(), copy.getNewMode()); + + assertEquals(o.getOldId(), copy.getOldId()); + assertEquals(n.getNewId(), copy.getNewId()); + + assertEquals(score, copy.getScore()); + } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffEntry.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffEntry.java index 4d25dbd02..304e4cff3 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffEntry.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffEntry.java @@ -315,4 +315,31 @@ public AbbreviatedObjectId getOldId() { public AbbreviatedObjectId getNewId() { return newId; } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append("DiffEntry["); + buf.append(changeType); + buf.append(" "); + switch (changeType) { + case ADD: + buf.append(newName); + break; + case COPY: + buf.append(oldName + "->" + newName); + break; + case DELETE: + buf.append(oldName); + break; + case MODIFY: + buf.append(oldName); + break; + case RENAME: + buf.append(oldName + "->" + newName); + break; + } + buf.append("]"); + return buf.toString(); + } } \ No newline at end of file diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java index 5bb63c4dd..bc2394053 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java @@ -45,6 +45,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Comparator; @@ -312,70 +313,131 @@ private void findExactRenames(ProgressMonitor pm) { return; pm.beginTask(JGitText.get().renamesFindingExact, // - added.size() + deleted.size()); + added.size() + added.size() + deleted.size() + + added.size() * deleted.size()); - HashMap map = new HashMap(); - for (DiffEntry del : deleted) { - Object old = map.put(del.oldId, del); - if (old instanceof DiffEntry) { - ArrayList list = new ArrayList(2); - list.add((DiffEntry) old); - list.add(del); - map.put(del.oldId, list); + HashMap deletedMap = populateMap(deleted, pm); + HashMap addedMap = populateMap(added, pm); - } else if (old != null) { - // Must be a list of DiffEntries - ((List) old).add(del); - map.put(del.oldId, old); - } - pm.update(1); + ArrayList uniqueAdds = new ArrayList(added.size()); + ArrayList> nonUniqueAdds = new ArrayList>(); + + for (Object o : addedMap.values()) { + if (o instanceof DiffEntry) + uniqueAdds.add((DiffEntry) o); + else + nonUniqueAdds.add((List) o); } ArrayList left = new ArrayList(added.size()); - for (DiffEntry dst : added) { - Object del = map.get(dst.newId); + + for (DiffEntry a : uniqueAdds) { + Object del = deletedMap.get(a.newId); if (del instanceof DiffEntry) { + // We have one add to one delete: pair them if they are the same + // type DiffEntry e = (DiffEntry) del; - if (sameType(e.oldMode, dst.newMode)) { - if (e.changeType == ChangeType.DELETE) { - e.changeType = ChangeType.RENAME; - entries.add(exactRename(e, dst)); - } else { - entries.add(exactCopy(e, dst)); - } + if (sameType(e.oldMode, a.newMode)) { + e.changeType = ChangeType.RENAME; + entries.add(exactRename(e, a)); } else { - left.add(dst); + left.add(a); } - } else if (del != null) { + // We have one add to many deletes: find the delete with the + // same type and closest name to the add, then pair them List list = (List) del; - DiffEntry best = null; - for (DiffEntry e : list) { - if (sameType(e.oldMode, dst.newMode)) { - best = e; - break; - } - } + DiffEntry best = bestPathMatch(a, list); if (best != null) { - if (best.changeType == ChangeType.DELETE) { - best.changeType = ChangeType.RENAME; - entries.add(exactRename(best, dst)); - } else { - entries.add(exactCopy(best, dst)); - } + best.changeType = ChangeType.RENAME; + entries.add(exactRename(best, a)); } else { - left.add(dst); + left.add(a); } - } else { - left.add(dst); + left.add(a); } pm.update(1); } + + for (List adds : nonUniqueAdds) { + Object o = deletedMap.get(adds.get(0).newId); + if (o instanceof DiffEntry) { + // We have many adds to one delete: find the add with the same + // type and closest name to the delete, then pair them. Mark the + // rest as copies of the delete. + DiffEntry d = (DiffEntry) o; + DiffEntry best = bestPathMatch(d, adds); + if (best != null) { + d.changeType = ChangeType.RENAME; + entries.add(exactRename(d, best)); + for (DiffEntry a : adds) { + if (a != best) { + if (sameType(d.oldMode, a.newMode)) { + entries.add(exactCopy(d, a)); + } else { + left.add(a); + } + } + } + } else { + left.addAll(adds); + } + } else { + // We have many adds to many deletes: score all the adds against + // all the deletes by path name, take the best matches, pair + // them as renames, then call the rest copies + List dels = (List) o; + long[] matrix = new long[dels.size() * adds.size()]; + int mNext = 0; + for (int addIdx = 0; addIdx < adds.size(); addIdx++) { + String addedName = adds.get(addIdx).newName; + + for (int delIdx = 0; delIdx < dels.size(); delIdx++) { + String deletedName = dels.get(delIdx).oldName; + + int score = SimilarityRenameDetector.nameScore(addedName, deletedName); + matrix[mNext] = SimilarityRenameDetector.encode(score, addIdx, delIdx); + mNext++; + } + } + + Arrays.sort(matrix); + + for (--mNext; mNext >= 0; mNext--) { + long ent = matrix[mNext]; + int delIdx = SimilarityRenameDetector.srcFile(ent); + int addIdx = SimilarityRenameDetector.dstFile(ent); + DiffEntry d = dels.get(delIdx); + DiffEntry a = adds.get(addIdx); + + if (a == null) { + pm.update(1); + continue; // was already matched earlier + } + + ChangeType type; + if (d.changeType == ChangeType.DELETE) { + // First use of this source file. Tag it as a rename so we + // later know it is already been used as a rename, other + // matches (if any) will claim themselves as copies instead. + // + d.changeType = ChangeType.RENAME; + type = ChangeType.RENAME; + } else { + type = ChangeType.COPY; + } + + entries.add(DiffEntry.pair(type, d, a, 100)); + adds.set(addIdx, null); // Claim the destination was matched. + pm.update(1); + } + } + } added = left; - deleted = new ArrayList(map.size()); - for (Object o : map.values()) { + deleted = new ArrayList(deletedMap.size()); + for (Object o : deletedMap.values()) { if (o instanceof DiffEntry) { DiffEntry e = (DiffEntry) o; if (e.changeType == ChangeType.DELETE) @@ -391,6 +453,69 @@ private void findExactRenames(ProgressMonitor pm) { pm.endTask(); } + /** + * Find the best match by file path for a given DiffEntry from a list of + * DiffEntrys. The returned DiffEntry will be of the same type as . If + * no DiffEntry can be found that has the same type, this method will return + * null. + * + * @param src + * the DiffEntry to try to find a match for + * @param list + * a list of DiffEntrys to search through + * @return the DiffEntry from who's file path best matches + */ + private static DiffEntry bestPathMatch(DiffEntry src, List list) { + DiffEntry best = null; + int score = -1; + + for (DiffEntry d : list) { + if (sameType(mode(d), mode(src))) { + int tmp = SimilarityRenameDetector + .nameScore(path(d), path(src)); + if (tmp > score) { + best = d; + score = tmp; + } + } + } + + return best; + } + + @SuppressWarnings("unchecked") + private HashMap populateMap( + List diffEntries, ProgressMonitor pm) { + HashMap map = new HashMap(); + for (DiffEntry de : diffEntries) { + Object old = map.put(id(de), de); + if (old instanceof DiffEntry) { + ArrayList list = new ArrayList(2); + list.add((DiffEntry) old); + list.add(de); + map.put(id(de), list); + } else if (old != null) { + // Must be a list of DiffEntries + ((List) old).add(de); + map.put(id(de), old); + } + pm.update(1); + } + return map; + } + + private static String path(DiffEntry de) { + return de.changeType == ChangeType.DELETE ? de.oldName : de.newName; + } + + private static FileMode mode(DiffEntry de) { + return de.changeType == ChangeType.DELETE ? de.oldMode : de.newMode; + } + + private static AbbreviatedObjectId id(DiffEntry de) { + return de.changeType == ChangeType.DELETE ? de.oldId : de.newId; + } + static boolean sameType(FileMode a, FileMode b) { // Files have to be of the same type in order to rename them. // We would never want to rename a file to a gitlink, or a diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java index 6590f746f..e2115f0ac 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java @@ -287,7 +287,7 @@ private int buildMatrix(ProgressMonitor pm) throws IOException { return mNext; } - private int nameScore(String a, String b) { + static int nameScore(String a, String b) { int aDirLen = a.lastIndexOf("/") + 1; int bDirLen = b.lastIndexOf("/") + 1; @@ -349,15 +349,15 @@ private static int score(long value) { return (int) (value >>> SCORE_SHIFT); } - private static int srcFile(long value) { + static int srcFile(long value) { return decodeFile(((int) (value >>> BITS_PER_INDEX)) & INDEX_MASK); } - private static int dstFile(long value) { + static int dstFile(long value) { return decodeFile(((int) value) & INDEX_MASK); } - private static long encode(int score, int srcIdx, int dstIdx) { + static long encode(int score, int srcIdx, int dstIdx) { return (((long) score) << SCORE_SHIFT) // | (encodeFile(srcIdx) << BITS_PER_INDEX) // | encodeFile(dstIdx);