Merge branch 'js/rename'

* js/rename:
  Implemented file path based tie breaking to exact rename detection
  Added more test cases for RenameDetector
  Added very small optimization to exact rename detection
  Fixed Misleading Javadoc
  Added file path similarity to scoring metric in rename detection
  Fixed potential div by zero bug
  Added file size based rename detection optimization
  Create FileHeader from DiffEntry
  log: Implement --follow
  Cache the diff configuration section
  log: Add whitespace ignore options
  Format submodule links during differences
  Redo DiffFormatter API to be easier to use
  log, diff: Add rename detection support
  Implement similarity based rename detection
  Added a preliminary version of rename detection
  Refactored code out of FileHeader to facilitate rename detection
This commit is contained in:
Shawn O. Pearce 2010-07-16 10:22:15 -07:00
commit 6e155d5f41
30 changed files with 3339 additions and 305 deletions

View File

@ -66,7 +66,9 @@ metaVar_directory=DIRECTORY
metaVar_file=FILE
metaVar_gitDir=GIT_DIR
metaVar_hostName=HOSTNAME
metaVar_linesOfContext=lines
metaVar_message=message
metaVar_n=n
metaVar_name=name
metaVar_object=object
metaVar_op=OP
@ -139,6 +141,7 @@ usage_cloneRepositoryIntoNewDir=Clone a repository into a new directory
usage_configureTheServiceInDaemonServicename=configure the service in daemon.servicename
usage_deleteBranchEvenIfNotMerged=delete branch (even if not merged)
usage_deleteFullyMergedBranch=delete fully merged branch
usage_detectRenames=detect renamed files
usage_directoriesToExport=directories to export
usage_disableTheServiceInAllRepositories=disable the service in all repositories
usage_displayAListOfAllRegisteredJgitCommands=Display a list of all registered jgit commands
@ -160,6 +163,7 @@ usage_listBothRemoteTrackingAndLocalBranches=list both remote-tracking and local
usage_listCreateOrDeleteBranches=List, create, or delete branches
usage_logAllPretty=format:%H %ct %P' output=log --all '--pretty=format:%H %ct %P' output
usage_moveRenameABranch=move/rename a branch
usage_nameStatus=show only name and status of files
usage_outputFile=Output file
usage_path=path
usage_performFsckStyleChecksOnReceive=perform fsck style checks on receive
@ -168,8 +172,10 @@ usage_produceAnEclipseIPLog=Produce an Eclipse IP log
usage_pruneStaleTrackingRefs=prune stale tracking refs
usage_recurseIntoSubtrees=recurse into subtrees
usage_recordChangesToRepository=Record changes to the repository
usage_renameLimit=limit size of rename matrix
usage_setTheGitRepositoryToOperateOn=set the git repository to operate on
usage_showRefNamesMatchingCommits=Show ref names matching commits
usage_showPatch=display patch
usage_symbolicVersionForTheProject=Symbolic version for the project
usage_synchronizeIPZillaData=Synchronize IPZilla data
usage_tagMessage=tag message

View File

@ -45,31 +45,34 @@
package org.eclipse.jgit.pgm;
import java.io.BufferedOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.List;
import org.kohsuke.args4j.Argument;
import org.kohsuke.args4j.Option;
import org.eclipse.jgit.diff.DiffEntry;
import org.eclipse.jgit.diff.DiffFormatter;
import org.eclipse.jgit.diff.MyersDiff;
import org.eclipse.jgit.diff.RawText;
import org.eclipse.jgit.diff.RawTextIgnoreAllWhitespace;
import org.eclipse.jgit.diff.RawTextIgnoreLeadingWhitespace;
import org.eclipse.jgit.diff.RawTextIgnoreTrailingWhitespace;
import org.eclipse.jgit.diff.RawTextIgnoreWhitespaceChange;
import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.diff.RenameDetector;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.TextProgressMonitor;
import org.eclipse.jgit.pgm.opt.PathTreeFilterHandler;
import org.eclipse.jgit.treewalk.AbstractTreeIterator;
import org.eclipse.jgit.treewalk.TreeWalk;
import org.eclipse.jgit.treewalk.filter.AndTreeFilter;
import org.eclipse.jgit.treewalk.filter.TreeFilter;
import org.kohsuke.args4j.Argument;
import org.kohsuke.args4j.Option;
@Command(common = true, usage = "usage_ShowDiffs")
class Diff extends TextBuiltin {
private final DiffFormatter diffFmt = new DiffFormatter( //
new BufferedOutputStream(System.out));
@Argument(index = 0, metaVar = "metaVar_treeish", required = true)
void tree_0(final AbstractTreeIterator c) {
trees.add(c);
@ -78,25 +81,101 @@ void tree_0(final AbstractTreeIterator c) {
@Argument(index = 1, metaVar = "metaVar_treeish", required = true)
private final List<AbstractTreeIterator> trees = new ArrayList<AbstractTreeIterator>();
@Option(name = "--", metaVar = "metaVar_port", multiValued = true, handler = PathTreeFilterHandler.class)
@Option(name = "--", metaVar = "metaVar_paths", multiValued = true, handler = PathTreeFilterHandler.class)
private TreeFilter pathFilter = TreeFilter.ALL;
// BEGIN -- Options shared with Log
@Option(name = "-p", usage = "usage_showPatch")
boolean showPatch;
@Option(name = "-M", usage = "usage_detectRenames")
private boolean detectRenames;
@Option(name = "-l", usage = "usage_renameLimit")
private Integer renameLimit;
@Option(name = "--name-status", usage = "usage_nameStatus")
private boolean showNameAndStatusOnly;
@Option(name = "--ignore-space-at-eol")
private boolean ignoreWsTrailing;
void ignoreSpaceAtEol(@SuppressWarnings("unused") boolean on) {
diffFmt.setRawTextFactory(RawTextIgnoreTrailingWhitespace.FACTORY);
}
@Option(name = "--ignore-leading-space")
private boolean ignoreWsLeading;
void ignoreLeadingSpace(@SuppressWarnings("unused") boolean on) {
diffFmt.setRawTextFactory(RawTextIgnoreLeadingWhitespace.FACTORY);
}
@Option(name = "-b", aliases = { "--ignore-space-change" })
private boolean ignoreWsChange;
void ignoreSpaceChange(@SuppressWarnings("unused") boolean on) {
diffFmt.setRawTextFactory(RawTextIgnoreWhitespaceChange.FACTORY);
}
@Option(name = "-w", aliases = { "--ignore-all-space" })
private boolean ignoreWsAll;
void ignoreAllSpace(@SuppressWarnings("unused") boolean on) {
diffFmt.setRawTextFactory(RawTextIgnoreAllWhitespace.FACTORY);
}
private DiffFormatter fmt = new DiffFormatter();
@Option(name = "-U", aliases = { "--unified" }, metaVar = "metaVar_linesOfContext")
void unified(int lines) {
diffFmt.setContext(lines);
}
@Option(name = "--abbrev", metaVar = "n")
void abbrev(int lines) {
diffFmt.setAbbreviationLength(lines);
}
@Option(name = "--full-index")
void abbrev(@SuppressWarnings("unused") boolean on) {
diffFmt.setAbbreviationLength(Constants.OBJECT_ID_STRING_LENGTH);
}
// END -- Options shared with Log
@Override
protected void run() throws Exception {
List<DiffEntry> files = scan();
if (showNameAndStatusOnly) {
nameStatus(out, files);
out.flush();
} else {
diffFmt.setRepository(db);
diffFmt.format(files);
diffFmt.flush();
}
}
static void nameStatus(PrintWriter out, List<DiffEntry> files) {
for (DiffEntry ent : files) {
switch (ent.getChangeType()) {
case ADD:
out.println("A\t" + ent.getNewName());
break;
case DELETE:
out.println("D\t" + ent.getOldName());
break;
case MODIFY:
out.println("M\t" + ent.getNewName());
break;
case COPY:
out.format("C%1$03d\t%2$s\t%3$s", ent.getScore(), //
ent.getOldName(), ent.getNewName());
out.println();
break;
case RENAME:
out.format("R%1$03d\t%2$s\t%3$s", ent.getScore(), //
ent.getOldName(), ent.getNewName());
out.println();
break;
}
}
}
private List<DiffEntry> scan() throws IOException {
final TreeWalk walk = new TreeWalk(db);
walk.reset();
walk.setRecursive(true);
@ -104,65 +183,14 @@ protected void run() throws Exception {
walk.addTree(i);
walk.setFilter(AndTreeFilter.create(TreeFilter.ANY_DIFF, pathFilter));
while (walk.next())
outputDiff(System.out, walk.getPathString(),
walk.getObjectId(0), walk.getFileMode(0),
walk.getObjectId(1), walk.getFileMode(1));
}
protected void outputDiff(PrintStream out, String path,
ObjectId id1, FileMode mode1, ObjectId id2, FileMode mode2) throws IOException {
String name1 = "a/" + path;
String name2 = "b/" + path;
out.println("diff --git " + name1 + " " + name2);
boolean isNew=false;
boolean isDelete=false;
if (id1.equals(ObjectId.zeroId())) {
out.println("new file mode " + mode2);
isNew=true;
} else if (id2.equals(ObjectId.zeroId())) {
out.println("deleted file mode " + mode1);
isDelete=true;
} else if (!mode1.equals(mode2)) {
out.println("old mode " + mode1);
out.println("new mode " + mode2);
List<DiffEntry> files = DiffEntry.scan(walk);
if (detectRenames) {
RenameDetector rd = new RenameDetector(db);
if (renameLimit != null)
rd.setRenameLimit(renameLimit.intValue());
rd.addAll(files);
files = rd.compute(new TextProgressMonitor());
}
out.println("index " + id1.abbreviate(db, 7).name()
+ ".." + id2.abbreviate(db, 7).name()
+ (mode1.equals(mode2) ? " " + mode1 : ""));
out.println("--- " + (isNew ? "/dev/null" : name1));
out.println("+++ " + (isDelete ? "/dev/null" : name2));
byte[] aRaw = getRawBytes(id1);
byte[] bRaw = getRawBytes(id2);
if (RawText.isBinary(aRaw) || RawText.isBinary(bRaw)) {
out.println("Binary files differ");
return;
}
RawText a = getRawText(aRaw);
RawText b = getRawText(bRaw);
MyersDiff diff = new MyersDiff(a, b);
fmt.formatEdits(out, a, b, diff.getEdits());
}
private byte[] getRawBytes(ObjectId id) throws IOException {
if (id.equals(ObjectId.zeroId()))
return new byte[] {};
return db.openBlob(id).getCachedBytes();
}
private RawText getRawText(byte[] raw) {
if (ignoreWsAll)
return new RawTextIgnoreAllWhitespace(raw);
else if (ignoreWsTrailing)
return new RawTextIgnoreTrailingWhitespace(raw);
else if (ignoreWsChange)
return new RawTextIgnoreWhitespaceChange(raw);
else if (ignoreWsLeading)
return new RawTextIgnoreLeadingWhitespace(raw);
else
return new RawText(raw);
return files;
}
}

View File

@ -45,22 +45,39 @@
package org.eclipse.jgit.pgm;
import java.io.BufferedOutputStream;
import java.io.IOException;
import java.text.DateFormat;
import java.text.MessageFormat;
import java.text.SimpleDateFormat;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.TimeZone;
import org.kohsuke.args4j.Option;
import org.eclipse.jgit.diff.DiffEntry;
import org.eclipse.jgit.diff.DiffFormatter;
import org.eclipse.jgit.diff.RawTextIgnoreAllWhitespace;
import org.eclipse.jgit.diff.RawTextIgnoreLeadingWhitespace;
import org.eclipse.jgit.diff.RawTextIgnoreTrailingWhitespace;
import org.eclipse.jgit.diff.RawTextIgnoreWhitespaceChange;
import org.eclipse.jgit.diff.RenameDetector;
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.PersonIdent;
import org.eclipse.jgit.lib.Ref;
import org.eclipse.jgit.revwalk.FollowFilter;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.treewalk.TreeWalk;
import org.eclipse.jgit.treewalk.filter.AndTreeFilter;
import org.eclipse.jgit.treewalk.filter.TreeFilter;
import org.kohsuke.args4j.Option;
@Command(common = true, usage = "usage_viewCommitHistory")
class Log extends RevWalkTextBuiltin {
@ -68,11 +85,64 @@ class Log extends RevWalkTextBuiltin {
private final DateFormat fmt;
private final DiffFormatter diffFmt = new DiffFormatter( //
new BufferedOutputStream(System.out));
private Map<AnyObjectId, Set<Ref>> allRefsByPeeledObjectId;
@Option(name="--decorate", usage="usage_showRefNamesMatchingCommits")
private boolean decorate;
// BEGIN -- Options shared with Diff
@Option(name = "-p", usage = "usage_showPatch")
boolean showPatch;
@Option(name = "-M", usage = "usage_detectRenames")
private boolean detectRenames;
@Option(name = "-l", usage = "usage_renameLimit")
private Integer renameLimit;
@Option(name = "--name-status", usage = "usage_nameStatus")
private boolean showNameAndStatusOnly;
@Option(name = "--ignore-space-at-eol")
void ignoreSpaceAtEol(@SuppressWarnings("unused") boolean on) {
diffFmt.setRawTextFactory(RawTextIgnoreTrailingWhitespace.FACTORY);
}
@Option(name = "--ignore-leading-space")
void ignoreLeadingSpace(@SuppressWarnings("unused") boolean on) {
diffFmt.setRawTextFactory(RawTextIgnoreLeadingWhitespace.FACTORY);
}
@Option(name = "-b", aliases = { "--ignore-space-change" })
void ignoreSpaceChange(@SuppressWarnings("unused") boolean on) {
diffFmt.setRawTextFactory(RawTextIgnoreWhitespaceChange.FACTORY);
}
@Option(name = "-w", aliases = { "--ignore-all-space" })
void ignoreAllSpace(@SuppressWarnings("unused") boolean on) {
diffFmt.setRawTextFactory(RawTextIgnoreAllWhitespace.FACTORY);
}
@Option(name = "-U", aliases = { "--unified" }, metaVar = "metaVar_linesOfContext")
void unified(int lines) {
diffFmt.setContext(lines);
}
@Option(name = "--abbrev", metaVar = "metaVar_n")
void abbrev(int lines) {
diffFmt.setAbbreviationLength(lines);
}
@Option(name = "--full-index")
void abbrev(@SuppressWarnings("unused") boolean on) {
diffFmt.setAbbreviationLength(Constants.OBJECT_ID_STRING_LENGTH);
}
// END -- Options shared with Diff
Log() {
fmt = new SimpleDateFormat("EEE MMM dd HH:mm:ss yyyy ZZZZZ", Locale.US);
}
@ -120,6 +190,77 @@ protected void show(final RevCommit c) throws Exception {
}
out.println();
if (c.getParentCount() == 1 && (showNameAndStatusOnly || showPatch))
showDiff(c);
out.flush();
}
private void showDiff(RevCommit c) throws IOException {
final TreeWalk tw = new TreeWalk(db);
tw.setRecursive(true);
tw.reset();
tw.addTree(c.getParent(0).getTree());
tw.addTree(c.getTree());
tw.setFilter(AndTreeFilter.create(pathFilter, TreeFilter.ANY_DIFF));
List<DiffEntry> files = DiffEntry.scan(tw);
if (pathFilter instanceof FollowFilter && isAdd(files)) {
// The file we are following was added here, find where it
// came from so we can properly show the rename or copy,
// then continue digging backwards.
//
tw.reset();
tw.addTree(c.getParent(0).getTree());
tw.addTree(c.getTree());
tw.setFilter(TreeFilter.ANY_DIFF);
files = updateFollowFilter(detectRenames(DiffEntry.scan(tw)));
} else if (detectRenames)
files = detectRenames(files);
if (showNameAndStatusOnly) {
Diff.nameStatus(out, files);
} else {
diffFmt.setRepository(db);
diffFmt.format(files);
diffFmt.flush();
}
out.println();
}
private List<DiffEntry> detectRenames(List<DiffEntry> files)
throws IOException {
RenameDetector rd = new RenameDetector(db);
if (renameLimit != null)
rd.setRenameLimit(renameLimit.intValue());
rd.addAll(files);
return rd.compute();
}
private boolean isAdd(List<DiffEntry> files) {
String oldPath = ((FollowFilter) pathFilter).getPath();
for (DiffEntry ent : files) {
if (ent.getChangeType() == ChangeType.ADD
&& ent.getNewName().equals(oldPath))
return true;
}
return false;
}
private List<DiffEntry> updateFollowFilter(List<DiffEntry> files) {
String oldPath = ((FollowFilter) pathFilter).getPath();
for (DiffEntry ent : files) {
if (isRename(ent) && ent.getNewName().equals(oldPath)) {
pathFilter = FollowFilter.create(ent.getOldName());
return Collections.singletonList(ent);
}
}
return Collections.emptyList();
}
private static boolean isRename(DiffEntry ent) {
return ent.getChangeType() == ChangeType.RENAME
|| ent.getChangeType() == ChangeType.COPY;
}
}

View File

@ -53,6 +53,7 @@
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.pgm.opt.PathTreeFilterHandler;
import org.eclipse.jgit.revwalk.FollowFilter;
import org.eclipse.jgit.revwalk.ObjectWalk;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevFlag;
@ -110,11 +111,16 @@ void enableBoundary(final boolean on) {
enableRevSort(RevSort.BOUNDARY, on);
}
@Option(name = "--follow", metaVar = "metaVar_path")
void follow(final String path) {
pathFilter = FollowFilter.create(path);
}
@Argument(index = 0, metaVar = "metaVar_commitish")
private final List<RevCommit> commits = new ArrayList<RevCommit>();
@Option(name = "--", metaVar = "metaVar_path", multiValued = true, handler = PathTreeFilterHandler.class)
private TreeFilter pathFilter = TreeFilter.ALL;
protected TreeFilter pathFilter = TreeFilter.ALL;
private final List<RevFilter> revLimiter = new ArrayList<RevFilter>();
@ -139,7 +145,9 @@ protected void run() throws Exception {
for (final RevSort s : sorting)
walk.sort(s, true);
if (pathFilter != TreeFilter.ALL)
if (pathFilter instanceof FollowFilter)
walk.setTreeFilter(pathFilter);
else if (pathFilter != TreeFilter.ALL)
walk.setTreeFilter(AndTreeFilter.create(pathFilter,
TreeFilter.ANY_DIFF));

View File

@ -68,7 +68,7 @@ public class DiffFormatterReflowTest extends TestCase {
protected void setUp() throws Exception {
super.setUp();
out = new ByteArrayOutputStream();
fmt = new DiffFormatter();
fmt = new DiffFormatter(out);
}
public void testNegativeContextFails() throws IOException {
@ -143,7 +143,7 @@ private void assertFormatted() throws IOException {
}
private void assertFormatted(final String name) throws IOException {
fmt.format(out, file, a, b);
fmt.format(file, a, b);
final String exp = RawParseUtils.decode(readFile(name));
assertEquals(exp, RawParseUtils.decode(out.toByteArray()));
}

View File

@ -0,0 +1,186 @@
/*
* Copyright (C) 2010, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.diff;
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
import org.eclipse.jgit.junit.TestRepository;
import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.RepositoryTestCase;
import org.eclipse.jgit.patch.FileHeader;
import org.eclipse.jgit.patch.HunkHeader;
import org.eclipse.jgit.util.RawParseUtils;
import org.eclipse.jgit.util.io.DisabledOutputStream;
public class DiffFormatterTest extends RepositoryTestCase {
private static final String DIFF = "diff --git ";
private static final String REGULAR_FILE = "100644";
private static final String GITLINK = "160000";
private static final String PATH_A = "src/a";
private static final String PATH_B = "src/b";
private DiffFormatter df;
private TestRepository testDb;
@Override
public void setUp() throws Exception {
super.setUp();
testDb = new TestRepository(db);
df = new DiffFormatter(DisabledOutputStream.INSTANCE);
df.setRepository(db);
}
public void testCreateFileHeader_Modify() throws Exception {
ObjectId adId = blob("a\nd\n");
ObjectId abcdId = blob("a\nb\nc\nd\n");
String diffHeader = makeDiffHeader(PATH_A, PATH_A, adId, abcdId);
DiffEntry ad = DiffEntry.delete(PATH_A, adId);
DiffEntry abcd = DiffEntry.add(PATH_A, abcdId);
DiffEntry mod = DiffEntry.pair(ChangeType.MODIFY, ad, abcd, 0);
FileHeader fh = df.createFileHeader(mod);
assertEquals(diffHeader, RawParseUtils.decode(fh.getBuffer()));
assertEquals(0, fh.getStartOffset());
assertEquals(fh.getBuffer().length, fh.getEndOffset());
assertEquals(FileHeader.PatchType.UNIFIED, fh.getPatchType());
assertEquals(1, fh.getHunks().size());
HunkHeader hh = fh.getHunks().get(0);
assertEquals(1, hh.toEditList().size());
EditList el = hh.toEditList();
assertEquals(1, el.size());
Edit e = el.get(0);
assertEquals(1, e.getBeginA());
assertEquals(1, e.getEndA());
assertEquals(1, e.getBeginB());
assertEquals(3, e.getEndB());
assertEquals(Edit.Type.INSERT, e.getType());
}
public void testCreateFileHeader_Binary() throws Exception {
ObjectId adId = blob("a\nd\n");
ObjectId binId = blob("a\nb\nc\n\0\0\0\0d\n");
String diffHeader = makeDiffHeader(PATH_A, PATH_B, adId, binId)
+ "Binary files differ\n";
DiffEntry ad = DiffEntry.delete(PATH_A, adId);
DiffEntry abcd = DiffEntry.add(PATH_B, binId);
DiffEntry mod = DiffEntry.pair(ChangeType.MODIFY, ad, abcd, 0);
FileHeader fh = df.createFileHeader(mod);
assertEquals(diffHeader, RawParseUtils.decode(fh.getBuffer()));
assertEquals(FileHeader.PatchType.BINARY, fh.getPatchType());
assertEquals(1, fh.getHunks().size());
HunkHeader hh = fh.getHunks().get(0);
assertEquals(0, hh.toEditList().size());
}
public void testCreateFileHeader_GitLink() throws Exception {
ObjectId aId = blob("a\n");
ObjectId bId = blob("b\n");
String diffHeader = makeDiffHeaderModeChange(PATH_A, PATH_A, aId, bId,
GITLINK, REGULAR_FILE)
+ "-Subproject commit " + aId.name() + "\n";
DiffEntry ad = DiffEntry.delete(PATH_A, aId);
ad.oldMode = FileMode.GITLINK;
DiffEntry abcd = DiffEntry.add(PATH_A, bId);
DiffEntry mod = DiffEntry.pair(ChangeType.MODIFY, ad, abcd, 0);
FileHeader fh = df.createFileHeader(mod);
assertEquals(diffHeader, RawParseUtils.decode(fh.getBuffer()));
assertEquals(1, fh.getHunks().size());
HunkHeader hh = fh.getHunks().get(0);
assertEquals(0, hh.toEditList().size());
}
private String makeDiffHeader(String pathA, String pathB, ObjectId aId,
ObjectId bId) {
String a = aId.abbreviate(db).name();
String b = bId.abbreviate(db).name();
return DIFF + "a/" + pathA + " " + "b/" + pathB + "\n" + //
"index " + a + ".." + b + " " + REGULAR_FILE + "\n" + //
"--- a/" + pathA + "\n" + //
"+++ b/" + pathB + "\n";
}
private String makeDiffHeaderModeChange(String pathA, String pathB,
ObjectId aId, ObjectId bId, String modeA, String modeB) {
String a = aId.abbreviate(db).name();
String b = bId.abbreviate(db).name();
return DIFF + "a/" + pathA + " " + "b/" + pathB + "\n" + //
"old mode " + modeA + "\n" + //
"new mode " + modeB + "\n" + //
"index " + a + ".." + b + "\n" + //
"--- a/" + pathA + "\n" + //
"+++ b/" + pathB + "\n";
}
private ObjectId blob(String content) throws Exception {
return testDb.blob(content).copy();
}
}

View File

@ -0,0 +1,446 @@
/*
* Copyright (C) 2010, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.diff;
import java.util.List;
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
import org.eclipse.jgit.junit.TestRepository;
import org.eclipse.jgit.lib.AbbreviatedObjectId;
import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.RepositoryTestCase;
public class RenameDetectorTest extends RepositoryTestCase {
private static final String PATH_A = "src/A";
private static final String PATH_B = "src/B";
private static final String PATH_H = "src/H";
private static final String PATH_Q = "src/Q";
private RenameDetector rd;
private TestRepository testDb;
@Override
public void setUp() throws Exception {
super.setUp();
testDb = new TestRepository(db);
rd = new RenameDetector(db);
}
public void testExactRename_OneRename() throws Exception {
ObjectId foo = blob("foo");
DiffEntry a = DiffEntry.add(PATH_A, foo);
DiffEntry b = DiffEntry.delete(PATH_Q, foo);
rd.add(a);
rd.add(b);
List<DiffEntry> entries = rd.compute();
assertEquals(1, entries.size());
assertRename(b, a, 100, entries.get(0));
}
public void testExactRename_OneRenameOneModify() throws Exception {
ObjectId foo = blob("foo");
ObjectId bar = blob("bar");
DiffEntry a = DiffEntry.add(PATH_A, foo);
DiffEntry b = DiffEntry.delete(PATH_Q, foo);
DiffEntry c = DiffEntry.modify(PATH_H);
c.newId = c.oldId = AbbreviatedObjectId.fromObjectId(bar);
rd.add(a);
rd.add(b);
rd.add(c);
List<DiffEntry> entries = rd.compute();
assertEquals(2, entries.size());
assertRename(b, a, 100, entries.get(0));
assertSame(c, entries.get(1));
}
public void testExactRename_ManyRenames() throws Exception {
ObjectId foo = blob("foo");
ObjectId bar = blob("bar");
DiffEntry a = DiffEntry.add(PATH_A, foo);
DiffEntry b = DiffEntry.delete(PATH_Q, foo);
DiffEntry c = DiffEntry.add(PATH_H, bar);
DiffEntry d = DiffEntry.delete(PATH_B, bar);
rd.add(a);
rd.add(b);
rd.add(c);
rd.add(d);
List<DiffEntry> entries = rd.compute();
assertEquals(2, entries.size());
assertRename(b, a, 100, entries.get(0));
assertRename(d, c, 100, entries.get(1));
}
public void testExactRename_MultipleIdenticalDeletes() throws Exception {
ObjectId foo = blob("foo");
DiffEntry a = DiffEntry.delete(PATH_A, foo);
DiffEntry b = DiffEntry.delete(PATH_B, foo);
DiffEntry c = DiffEntry.delete(PATH_H, foo);
DiffEntry d = DiffEntry.add(PATH_Q, foo);
rd.add(a);
rd.add(b);
rd.add(c);
rd.add(d);
// Pairs the add with the first delete added
List<DiffEntry> entries = rd.compute();
assertEquals(3, entries.size());
assertEquals(b, entries.get(0));
assertEquals(c, entries.get(1));
assertRename(a, d, 100, entries.get(2));
}
public void testExactRename_PathBreaksTie() throws Exception {
ObjectId foo = blob("foo");
DiffEntry a = DiffEntry.add("src/com/foo/a.java", foo);
DiffEntry b = DiffEntry.delete("src/com/foo/b.java", foo);
DiffEntry c = DiffEntry.add("c.txt", foo);
DiffEntry d = DiffEntry.delete("d.txt", foo);
// Add out of order to avoid first-match succeeding
rd.add(a);
rd.add(d);
rd.add(b);
rd.add(c);
List<DiffEntry> entries = rd.compute();
assertEquals(2, entries.size());
assertRename(d, c, 100, entries.get(0));
assertRename(b, a, 100, entries.get(1));
}
public void testExactRename_OneDeleteManyAdds() throws Exception {
ObjectId foo = blob("foo");
DiffEntry a = DiffEntry.add("src/com/foo/a.java", foo);
DiffEntry b = DiffEntry.add("src/com/foo/b.java", foo);
DiffEntry c = DiffEntry.add("c.txt", foo);
DiffEntry d = DiffEntry.delete("d.txt", foo);
rd.add(a);
rd.add(b);
rd.add(c);
rd.add(d);
List<DiffEntry> entries = rd.compute();
assertEquals(3, entries.size());
assertRename(d, c, 100, entries.get(0));
assertCopy(d, a, 100, entries.get(1));
assertCopy(d, b, 100, entries.get(2));
}
public void testInexactRename_OnePair() throws Exception {
ObjectId aId = blob("foo\nbar\nbaz\nblarg\n");
ObjectId bId = blob("foo\nbar\nbaz\nblah\n");
DiffEntry a = DiffEntry.add(PATH_A, aId);
DiffEntry b = DiffEntry.delete(PATH_Q, bId);
rd.add(a);
rd.add(b);
List<DiffEntry> entries = rd.compute();
assertEquals(1, entries.size());
assertRename(b, a, 66, entries.get(0));
}
public void testInexactRename_OneRenameTwoUnrelatedFiles() throws Exception {
ObjectId aId = blob("foo\nbar\nbaz\nblarg\n");
ObjectId bId = blob("foo\nbar\nbaz\nblah\n");
DiffEntry a = DiffEntry.add(PATH_A, aId);
DiffEntry b = DiffEntry.delete(PATH_Q, bId);
ObjectId cId = blob("some\nsort\nof\ntext\n");
ObjectId dId = blob("completely\nunrelated\ntext\n");
DiffEntry c = DiffEntry.add(PATH_B, cId);
DiffEntry d = DiffEntry.delete(PATH_H, dId);
rd.add(a);
rd.add(b);
rd.add(c);
rd.add(d);
List<DiffEntry> entries = rd.compute();
assertEquals(3, entries.size());
assertRename(b, a, 66, entries.get(0));
assertSame(c, entries.get(1));
assertSame(d, entries.get(2));
}
public void testInexactRename_LastByteDifferent() throws Exception {
ObjectId aId = blob("foo\nbar\na");
ObjectId bId = blob("foo\nbar\nb");
DiffEntry a = DiffEntry.add(PATH_A, aId);
DiffEntry b = DiffEntry.delete(PATH_Q, bId);
rd.add(a);
rd.add(b);
List<DiffEntry> entries = rd.compute();
assertEquals(1, entries.size());
assertRename(b, a, 88, entries.get(0));
}
public void testInexactRename_NewlinesOnly() throws Exception {
ObjectId aId = blob("\n\n\n");
ObjectId bId = blob("\n\n\n\n");
DiffEntry a = DiffEntry.add(PATH_A, aId);
DiffEntry b = DiffEntry.delete(PATH_Q, bId);
rd.add(a);
rd.add(b);
List<DiffEntry> entries = rd.compute();
assertEquals(1, entries.size());
assertRename(b, a, 74, entries.get(0));
}
public void testInexactRenames_OnePair2() throws Exception {
ObjectId aId = blob("ab\nab\nab\nac\nad\nae\n");
ObjectId bId = blob("ac\nab\nab\nab\naa\na0\na1\n");
DiffEntry a = DiffEntry.add(PATH_A, aId);
DiffEntry b = DiffEntry.delete(PATH_Q, bId);
rd.add(a);
rd.add(b);
rd.setRenameScore(50);
List<DiffEntry> entries = rd.compute();
assertEquals(1, entries.size());
assertRename(b, a, 57, entries.get(0));
}
public void testNoRenames_SingleByteFiles() throws Exception {
ObjectId aId = blob("a");
ObjectId bId = blob("b");
DiffEntry a = DiffEntry.add(PATH_A, aId);
DiffEntry b = DiffEntry.delete(PATH_Q, bId);
rd.add(a);
rd.add(b);
List<DiffEntry> entries = rd.compute();
assertEquals(2, entries.size());
assertSame(a, entries.get(0));
assertSame(b, entries.get(1));
}
public void testNoRenames_EmptyFile1() throws Exception {
ObjectId aId = blob("");
DiffEntry a = DiffEntry.add(PATH_A, aId);
rd.add(a);
List<DiffEntry> entries = rd.compute();
assertEquals(1, entries.size());
assertSame(a, entries.get(0));
}
public void testNoRenames_EmptyFile2() throws Exception {
ObjectId aId = blob("");
ObjectId bId = blob("blah");
DiffEntry a = DiffEntry.add(PATH_A, aId);
DiffEntry b = DiffEntry.delete(PATH_Q, bId);
rd.add(a);
rd.add(b);
List<DiffEntry> entries = rd.compute();
assertEquals(2, entries.size());
assertSame(a, entries.get(0));
assertSame(b, entries.get(1));
}
public void testNoRenames_SymlinkAndFile() throws Exception {
ObjectId aId = blob("src/dest");
DiffEntry a = DiffEntry.add(PATH_A, aId);
DiffEntry b = DiffEntry.delete(PATH_Q, aId);
b.oldMode = FileMode.SYMLINK;
rd.add(a);
rd.add(b);
List<DiffEntry> entries = rd.compute();
assertEquals(2, entries.size());
assertSame(a, entries.get(0));
assertSame(b, entries.get(1));
}
public void testNoRenames_GitlinkAndFile() throws Exception {
ObjectId aId = blob("src/dest");
DiffEntry a = DiffEntry.add(PATH_A, aId);
DiffEntry b = DiffEntry.delete(PATH_Q, aId);
b.oldMode = FileMode.GITLINK;
rd.add(a);
rd.add(b);
List<DiffEntry> entries = rd.compute();
assertEquals(2, entries.size());
assertSame(a, entries.get(0));
assertSame(b, entries.get(1));
}
public void testNoRenames_SymlinkAndFileSamePath() throws Exception {
ObjectId aId = blob("src/dest");
DiffEntry a = DiffEntry.delete(PATH_A, aId);
DiffEntry b = DiffEntry.add(PATH_A, aId);
a.oldMode = FileMode.SYMLINK;
rd.add(a);
rd.add(b);
// Deletes should be first
List<DiffEntry> entries = rd.compute();
assertEquals(2, entries.size());
assertSame(a, entries.get(0));
assertSame(b, entries.get(1));
}
public void testSetRenameScore_IllegalArgs() throws Exception {
try {
rd.setRenameScore(-1);
fail();
} catch (IllegalArgumentException e) {
// pass
}
try {
rd.setRenameScore(101);
fail();
} catch (IllegalArgumentException e) {
// pass
}
}
public void testRenameLimit() throws Exception {
ObjectId aId = blob("foo\nbar\nbaz\nblarg\n");
ObjectId bId = blob("foo\nbar\nbaz\nblah\n");
DiffEntry a = DiffEntry.add(PATH_A, aId);
DiffEntry b = DiffEntry.delete(PATH_B, bId);
ObjectId cId = blob("a\nb\nc\nd\n");
ObjectId dId = blob("a\nb\nc\n");
DiffEntry c = DiffEntry.add(PATH_H, cId);
DiffEntry d = DiffEntry.delete(PATH_Q, dId);
rd.add(a);
rd.add(b);
rd.add(c);
rd.add(d);
rd.setRenameLimit(1);
assertTrue(rd.isOverRenameLimit());
List<DiffEntry> entries = rd.compute();
assertEquals(4, entries.size());
assertSame(a, entries.get(0));
assertSame(b, entries.get(1));
assertSame(c, entries.get(2));
assertSame(d, entries.get(3));
}
private ObjectId blob(String content) throws Exception {
return testDb.blob(content).copy();
}
private static void assertRename(DiffEntry o, DiffEntry n, int score,
DiffEntry rename) {
assertEquals(ChangeType.RENAME, rename.getChangeType());
assertEquals(o.getOldName(), rename.getOldName());
assertEquals(n.getNewName(), rename.getNewName());
assertEquals(o.getOldMode(), rename.getOldMode());
assertEquals(n.getNewMode(), rename.getNewMode());
assertEquals(o.getOldId(), rename.getOldId());
assertEquals(n.getNewId(), rename.getNewId());
assertEquals(score, rename.getScore());
}
private static void assertCopy(DiffEntry o, DiffEntry n, int score,
DiffEntry copy) {
assertEquals(ChangeType.COPY, copy.getChangeType());
assertEquals(o.getOldName(), copy.getOldName());
assertEquals(n.getNewName(), copy.getNewName());
assertEquals(o.getOldMode(), copy.getOldMode());
assertEquals(n.getNewMode(), copy.getNewMode());
assertEquals(o.getOldId(), copy.getOldId());
assertEquals(n.getNewId(), copy.getNewId());
assertEquals(score, copy.getScore());
}
}

View File

@ -0,0 +1,137 @@
/*
* Copyright (C) 2010, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.diff;
import junit.framework.TestCase;
import org.eclipse.jgit.lib.Constants;
public class SimilarityIndexTest extends TestCase {
public void testIndexing() {
SimilarityIndex si = hash("" //
+ "A\n" //
+ "B\n" //
+ "D\n" //
+ "B\n" //
);
int key_A = keyFor("A\n");
int key_B = keyFor("B\n");
int key_D = keyFor("D\n");
assertTrue(key_A != key_B && key_A != key_D && key_B != key_D);
assertEquals(3, si.size());
assertEquals(2, si.count(si.findIndex(key_A)));
assertEquals(4, si.count(si.findIndex(key_B)));
assertEquals(2, si.count(si.findIndex(key_D)));
}
public void testCommonScore_SameFiles() {
String text = "" //
+ "A\n" //
+ "B\n" //
+ "D\n" //
+ "B\n";
SimilarityIndex src = hash(text);
SimilarityIndex dst = hash(text);
assertEquals(8, src.common(dst));
assertEquals(8, dst.common(src));
assertEquals(100, src.score(dst, 100));
assertEquals(100, dst.score(src, 100));
}
public void testCommonScore_EmptyFiles() {
SimilarityIndex src = hash("");
SimilarityIndex dst = hash("");
assertEquals(0, src.common(dst));
assertEquals(0, dst.common(src));
}
public void testCommonScore_TotallyDifferentFiles() {
SimilarityIndex src = hash("A\n");
SimilarityIndex dst = hash("D\n");
assertEquals(0, src.common(dst));
assertEquals(0, dst.common(src));
}
public void testCommonScore_SimiliarBy75() {
SimilarityIndex src = hash("A\nB\nC\nD\n");
SimilarityIndex dst = hash("A\nB\nC\nQ\n");
assertEquals(6, src.common(dst));
assertEquals(6, dst.common(src));
assertEquals(75, src.score(dst, 100));
assertEquals(75, dst.score(src, 100));
}
private static SimilarityIndex hash(String text) {
SimilarityIndex src = new SimilarityIndex() {
@Override
void hash(byte[] raw, int ptr, final int end) {
while (ptr < end) {
int hash = raw[ptr] & 0xff;
int start = ptr;
do {
int c = raw[ptr++] & 0xff;
if (c == '\n')
break;
} while (ptr < end && ptr - start < 64);
add(hash, ptr - start);
}
}
};
byte[] raw = Constants.encode(text);
src.setFileSize(raw.length);
src.hash(raw, 0, raw.length);
src.sort();
return src;
}
private static int keyFor(String line) {
SimilarityIndex si = hash(line);
assertEquals("single line scored", 1, si.size());
return si.key(0);
}
}

View File

@ -45,6 +45,7 @@
import junit.framework.TestCase;
import org.eclipse.jgit.diff.DiffEntry;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.lib.ObjectId;
@ -153,7 +154,7 @@ public void testParseUnicodeName_NewFile() {
assertParse(fh);
assertEquals("/dev/null", fh.getOldName());
assertSame(FileHeader.DEV_NULL, fh.getOldName());
assertSame(DiffEntry.DEV_NULL, fh.getOldName());
assertEquals("\u00c5ngstr\u00f6m", fh.getNewName());
assertSame(FileHeader.ChangeType.ADD, fh.getChangeType());
@ -179,7 +180,7 @@ public void testParseUnicodeName_DeleteFile() {
assertEquals("\u00c5ngstr\u00f6m", fh.getOldName());
assertEquals("/dev/null", fh.getNewName());
assertSame(FileHeader.DEV_NULL, fh.getNewName());
assertSame(DiffEntry.DEV_NULL, fh.getNewName());
assertSame(FileHeader.ChangeType.DELETE, fh.getChangeType());
assertSame(FileHeader.PatchType.UNIFIED, fh.getPatchType());

View File

@ -46,10 +46,11 @@
import java.io.IOException;
import java.io.InputStream;
import org.eclipse.jgit.lib.FileMode;
import junit.framework.TestCase;
import org.eclipse.jgit.diff.DiffEntry;
import org.eclipse.jgit.lib.FileMode;
public class PatchCcTest extends TestCase {
public void testParse_OneFileCc() throws IOException {
final Patch p = parseTestPatchFile();
@ -113,7 +114,7 @@ public void testParse_CcNewFile() throws IOException {
final CombinedFileHeader cfh = (CombinedFileHeader) p.getFiles().get(0);
assertSame(FileHeader.DEV_NULL, cfh.getOldName());
assertSame(DiffEntry.DEV_NULL, cfh.getOldName());
assertEquals("d", cfh.getNewName());
assertEquals(187, cfh.startOffset);
@ -168,7 +169,7 @@ public void testParse_CcDeleteFile() throws IOException {
final CombinedFileHeader cfh = (CombinedFileHeader) p.getFiles().get(0);
assertEquals("a", cfh.getOldName());
assertSame(FileHeader.DEV_NULL, cfh.getNewName());
assertSame(DiffEntry.DEV_NULL, cfh.getNewName());
assertEquals(187, cfh.startOffset);

View File

@ -6,6 +6,7 @@ JRELacksMD5Implementation=JRE lacks MD5 implementation
URINotSupported=URI not supported: {0}
URLNotFound={0} not found
aNewObjectIdIsRequired=A NewObjectId is required.
abbreviationLengthMustBeNonNegative=Abbreviation length must not be negative.
advertisementCameBefore=advertisement of {0}^{} came before {1}
advertisementOfCameBefore=advertisement of {0}^{} came before {1}
amazonS3ActionFailed={0} of '{1}' failed: {2} {3}
@ -299,7 +300,11 @@ remoteDoesNotHaveSpec=Remote does not have {0} available for fetch.
remoteDoesNotSupportSmartHTTPPush=remote does not support smart HTTP push
remoteHungUpUnexpectedly=remote hung up unexpectedly
remoteNameCantBeNull=Remote name can't be null.
renamesAlreadyFound=Renames have already been found.
renamesFindingByContent=Finding renames by content similarity
renamesFindingExact=Finding exact renames
repositoryAlreadyExists=Repository already exists: {0}
repositoryIsRequired=Repository is required.
repositoryNotFound=repository not found: {0}
repositoryState_applyMailbox=Apply mailbox
repositoryState_bisecting=Bisecting
@ -317,6 +322,7 @@ shortCompressedStreamAt=Short compressed stream at {0}
shortReadOfBlock=Short read of block.
shortReadOfOptionalDIRCExtensionExpectedAnotherBytes=Short read of optional DIRC extension {0}; expected another {1} bytes within the section.
shortSkipOfBlock=Short skip of block.
similarityScoreMustBeWithinBounds=Similarity score must be between 0 and 100.
smartHTTPPushDisabled=smart HTTP push disabled
sourceDestinationMustMatch=Source/Destination must match.
sourceIsNotAWildcard=Source is not a wildcard.

View File

@ -66,6 +66,7 @@ public static JGitText get() {
/***/ public String URINotSupported;
/***/ public String URLNotFound;
/***/ public String aNewObjectIdIsRequired;
/***/ public String abbreviationLengthMustBeNonNegative;
/***/ public String advertisementCameBefore;
/***/ public String advertisementOfCameBefore;
/***/ public String amazonS3ActionFailed;
@ -358,7 +359,11 @@ public static JGitText get() {
/***/ public String remoteDoesNotSupportSmartHTTPPush;
/***/ public String remoteHungUpUnexpectedly;
/***/ public String remoteNameCantBeNull;
/***/ public String renamesAlreadyFound;
/***/ public String renamesFindingByContent;
/***/ public String renamesFindingExact;
/***/ public String repositoryAlreadyExists;
/***/ public String repositoryIsRequired;
/***/ public String repositoryNotFound;
/***/ public String repositoryState_applyMailbox;
/***/ public String repositoryState_bisecting;
@ -376,6 +381,7 @@ public static JGitText get() {
/***/ public String shortReadOfBlock;
/***/ public String shortReadOfOptionalDIRCExtensionExpectedAnotherBytes;
/***/ public String shortSkipOfBlock;
/***/ public String similarityScoreMustBeWithinBounds;
/***/ public String smartHTTPPushDisabled;
/***/ public String sourceDestinationMustMatch;
/***/ public String sourceIsNotAWildcard;

View File

@ -0,0 +1,68 @@
/*
* Copyright (C) 2010, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.diff;
import org.eclipse.jgit.lib.Config;
import org.eclipse.jgit.lib.Config.SectionParser;
/** Keeps track of diff related configuration options. */
public class DiffConfig {
/** Key for {@link Config#get(SectionParser)}. */
public static final Config.SectionParser<DiffConfig> KEY = new SectionParser<DiffConfig>() {
public DiffConfig parse(final Config cfg) {
return new DiffConfig(cfg);
}
};
private final int renameLimit;
private DiffConfig(final Config rc) {
renameLimit = rc.getInt("diff", "renamelimit", 200);
}
/** @return limit on number of paths to perform inexact rename detection. */
public int getRenameLimit() {
return renameLimit;
}
}

View File

@ -0,0 +1,345 @@
/*
* Copyright (C) 2008-2010, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.diff;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.eclipse.jgit.lib.AbbreviatedObjectId;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.lib.MutableObjectId;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.treewalk.TreeWalk;
/** A value class representing a change to a file */
public class DiffEntry {
private static final AbbreviatedObjectId A_ZERO = AbbreviatedObjectId
.fromObjectId(ObjectId.zeroId());
/** Magical file name used for file adds or deletes. */
public static final String DEV_NULL = "/dev/null";
/** General type of change a single file-level patch describes. */
public static enum ChangeType {
/** Add a new file to the project */
ADD,
/** Modify an existing file in the project (content and/or mode) */
MODIFY,
/** Delete an existing file from the project */
DELETE,
/** Rename an existing file to a new location */
RENAME,
/** Copy an existing file to a new location, keeping the original */
COPY;
}
/**
* Create an empty DiffEntry
*/
protected DiffEntry(){
// reduce the visibility of the default constructor
}
/**
* Convert the TreeWalk into DiffEntry headers.
*
* @param walk
* the TreeWalk to walk through. Must have exactly two trees.
* @return headers describing the changed files.
* @throws IOException
* the repository cannot be accessed.
*/
public static List<DiffEntry> scan(TreeWalk walk) throws IOException {
List<DiffEntry> r = new ArrayList<DiffEntry>();
MutableObjectId idBuf = new MutableObjectId();
while (walk.next()) {
DiffEntry entry = new DiffEntry();
walk.getObjectId(idBuf, 0);
entry.oldId = AbbreviatedObjectId.fromObjectId(idBuf);
walk.getObjectId(idBuf, 1);
entry.newId = AbbreviatedObjectId.fromObjectId(idBuf);
entry.oldMode = walk.getFileMode(0);
entry.newMode = walk.getFileMode(1);
entry.newName = entry.oldName = walk.getPathString();
if (entry.oldMode == FileMode.MISSING) {
entry.oldName = DiffEntry.DEV_NULL;
entry.changeType = ChangeType.ADD;
r.add(entry);
} else if (entry.newMode == FileMode.MISSING) {
entry.newName = DiffEntry.DEV_NULL;
entry.changeType = ChangeType.DELETE;
r.add(entry);
} else {
entry.changeType = ChangeType.MODIFY;
if (RenameDetector.sameType(entry.oldMode, entry.newMode))
r.add(entry);
else
r.addAll(breakModify(entry));
}
}
return r;
}
static DiffEntry add(String path, AnyObjectId id) {
DiffEntry e = new DiffEntry();
e.oldId = A_ZERO;
e.oldMode = FileMode.MISSING;
e.oldName = DEV_NULL;
e.newId = AbbreviatedObjectId.fromObjectId(id);
e.newMode = FileMode.REGULAR_FILE;
e.newName = path;
e.changeType = ChangeType.ADD;
return e;
}
static DiffEntry delete(String path, AnyObjectId id) {
DiffEntry e = new DiffEntry();
e.oldId = AbbreviatedObjectId.fromObjectId(id);
e.oldMode = FileMode.REGULAR_FILE;
e.oldName = path;
e.newId = A_ZERO;
e.newMode = FileMode.MISSING;
e.newName = DEV_NULL;
e.changeType = ChangeType.DELETE;
return e;
}
static DiffEntry modify(String path) {
DiffEntry e = new DiffEntry();
e.oldMode = FileMode.REGULAR_FILE;
e.oldName = path;
e.newMode = FileMode.REGULAR_FILE;
e.newName = path;
e.changeType = ChangeType.MODIFY;
return e;
}
static List<DiffEntry> breakModify(DiffEntry entry) {
DiffEntry del = new DiffEntry();
del.oldId = entry.getOldId();
del.oldMode = entry.getOldMode();
del.oldName = entry.getOldName();
del.newId = A_ZERO;
del.newMode = FileMode.MISSING;
del.newName = DiffEntry.DEV_NULL;
del.changeType = ChangeType.DELETE;
DiffEntry add = new DiffEntry();
add.oldId = A_ZERO;
add.oldMode = FileMode.MISSING;
add.oldName = DiffEntry.DEV_NULL;
add.newId = entry.getNewId();
add.newMode = entry.getNewMode();
add.newName = entry.getNewName();
add.changeType = ChangeType.ADD;
return Arrays.asList(del, add);
}
static DiffEntry pair(ChangeType changeType, DiffEntry src, DiffEntry dst,
int score) {
DiffEntry r = new DiffEntry();
r.oldId = src.oldId;
r.oldMode = src.oldMode;
r.oldName = src.oldName;
r.newId = dst.newId;
r.newMode = dst.newMode;
r.newName = dst.newName;
r.changeType = changeType;
r.score = score;
return r;
}
/** File name of the old (pre-image). */
protected String oldName;
/** File name of the new (post-image). */
protected String newName;
/** Old mode of the file, if described by the patch, else null. */
protected FileMode oldMode;
/** New mode of the file, if described by the patch, else null. */
protected FileMode newMode;
/** General type of change indicated by the patch. */
protected ChangeType changeType;
/** Similarity score if {@link #changeType} is a copy or rename. */
protected int score;
/** ObjectId listed on the index line for the old (pre-image) */
protected AbbreviatedObjectId oldId;
/** ObjectId listed on the index line for the new (post-image) */
protected AbbreviatedObjectId newId;
/**
* Get the old name associated with this file.
* <p>
* The meaning of the old name can differ depending on the semantic meaning
* of this patch:
* <ul>
* <li><i>file add</i>: always <code>/dev/null</code></li>
* <li><i>file modify</i>: always {@link #getNewName()}</li>
* <li><i>file delete</i>: always the file being deleted</li>
* <li><i>file copy</i>: source file the copy originates from</li>
* <li><i>file rename</i>: source file the rename originates from</li>
* </ul>
*
* @return old name for this file.
*/
public String getOldName() {
return oldName;
}
/**
* Get the new name associated with this file.
* <p>
* The meaning of the new name can differ depending on the semantic meaning
* of this patch:
* <ul>
* <li><i>file add</i>: always the file being created</li>
* <li><i>file modify</i>: always {@link #getOldName()}</li>
* <li><i>file delete</i>: always <code>/dev/null</code></li>
* <li><i>file copy</i>: destination file the copy ends up at</li>
* <li><i>file rename</i>: destination file the rename ends up at/li>
* </ul>
*
* @return new name for this file.
*/
public String getNewName() {
return newName;
}
/** @return the old file mode, if described in the patch */
public FileMode getOldMode() {
return oldMode;
}
/** @return the new file mode, if described in the patch */
public FileMode getNewMode() {
return newMode;
}
/** @return the type of change this patch makes on {@link #getNewName()} */
public ChangeType getChangeType() {
return changeType;
}
/**
* @return similarity score between {@link #getOldName()} and
* {@link #getNewName()} if {@link #getChangeType()} is
* {@link ChangeType#COPY} or {@link ChangeType#RENAME}.
*/
public int getScore() {
return score;
}
/**
* Get the old object id from the <code>index</code>.
*
* @return the object id; null if there is no index line
*/
public AbbreviatedObjectId getOldId() {
return oldId;
}
/**
* Get the new object id from the <code>index</code>.
*
* @return the object id; null if there is no index line
*/
public AbbreviatedObjectId getNewId() {
return newId;
}
@Override
public String toString() {
StringBuilder buf = new StringBuilder();
buf.append("DiffEntry[");
buf.append(changeType);
buf.append(" ");
switch (changeType) {
case ADD:
buf.append(newName);
break;
case COPY:
buf.append(oldName + "->" + newName);
break;
case DELETE:
buf.append(oldName);
break;
case MODIFY:
buf.append(oldName);
break;
case RENAME:
buf.append(oldName + "->" + newName);
break;
}
buf.append("]");
return buf.toString();
}
}

View File

@ -44,14 +44,28 @@
package org.eclipse.jgit.diff;
import static org.eclipse.jgit.lib.Constants.encode;
import static org.eclipse.jgit.lib.Constants.encodeASCII;
import static org.eclipse.jgit.lib.FileMode.GITLINK;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.List;
import org.eclipse.jgit.JGitText;
import org.eclipse.jgit.errors.CorruptObjectException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.lib.AbbreviatedObjectId;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.lib.ObjectLoader;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.patch.FileHeader;
import org.eclipse.jgit.patch.HunkHeader;
import org.eclipse.jgit.patch.FileHeader.PatchType;
import org.eclipse.jgit.util.QuotedString;
import org.eclipse.jgit.util.io.DisabledOutputStream;
/**
* Format an {@link EditList} as a Git style unified patch script.
@ -59,11 +73,43 @@
public class DiffFormatter {
private static final byte[] noNewLine = encodeASCII("\\ No newline at end of file\n");
private final OutputStream out;
private Repository db;
private int context;
/** Create a new formatter with a default level of context. */
public DiffFormatter() {
private int abbreviationLength;
private RawText.Factory rawTextFactory = RawText.FACTORY;
/**
* Create a new formatter with a default level of context.
*
* @param out
* the stream the formatter will write line data to. This stream
* should have buffering arranged by the caller, as many small
* writes are performed to it.
*/
public DiffFormatter(OutputStream out) {
this.out = out;
setContext(3);
setAbbreviationLength(8);
}
/** @return the stream we are outputting data to. */
protected OutputStream getOutputStream() {
return out;
}
/**
* Set the repository the formatter can load object contents from.
*
* @param repository
* source repository holding referenced objects.
*/
public void setRepository(Repository repository) {
db = repository;
}
/**
@ -76,10 +122,210 @@ public DiffFormatter() {
*/
public void setContext(final int lineCount) {
if (lineCount < 0)
throw new IllegalArgumentException(JGitText.get().contextMustBeNonNegative);
throw new IllegalArgumentException(
JGitText.get().contextMustBeNonNegative);
context = lineCount;
}
/**
* Change the number of digits to show in an ObjectId.
*
* @param count
* number of digits to show in an ObjectId.
*/
public void setAbbreviationLength(final int count) {
if (count < 0)
throw new IllegalArgumentException(
JGitText.get().abbreviationLengthMustBeNonNegative);
abbreviationLength = count;
}
/**
* Set the helper that constructs difference output.
*
* @param type
* the factory to create different output. Different types of
* factories can produce different whitespace behavior, for
* example.
* @see RawText#FACTORY
* @see RawTextIgnoreAllWhitespace#FACTORY
* @see RawTextIgnoreLeadingWhitespace#FACTORY
* @see RawTextIgnoreTrailingWhitespace#FACTORY
* @see RawTextIgnoreWhitespaceChange#FACTORY
*/
public void setRawTextFactory(RawText.Factory type) {
rawTextFactory = type;
}
/**
* Flush the underlying output stream of this formatter.
*
* @throws IOException
* the stream's own flush method threw an exception.
*/
public void flush() throws IOException {
out.flush();
}
/**
* Format a patch script from a list of difference entries.
*
* @param entries
* entries describing the affected files.
* @throws IOException
* a file's content cannot be read, or the output stream cannot
* be written to.
*/
public void format(List<? extends DiffEntry> entries) throws IOException {
for (DiffEntry ent : entries)
format(ent);
}
/**
* Format a patch script for one file entry.
*
* @param ent
* the entry to be formatted.
* @throws IOException
* a file's content cannot be read, or the output stream cannot
* be written to.
*/
public void format(DiffEntry ent) throws IOException {
writeDiffHeader(out, ent);
if (ent.getOldMode() == GITLINK || ent.getNewMode() == GITLINK) {
writeGitLinkDiffText(out, ent);
} else {
byte[] aRaw = open(ent.getOldMode(), ent.getOldId());
byte[] bRaw = open(ent.getNewMode(), ent.getNewId());
if (RawText.isBinary(aRaw) || RawText.isBinary(bRaw)) {
out.write(encodeASCII("Binary files differ\n"));
} else {
RawText a = rawTextFactory.create(aRaw);
RawText b = rawTextFactory.create(bRaw);
formatEdits(a, b, new MyersDiff(a, b).getEdits());
}
}
}
private void writeGitLinkDiffText(OutputStream o, DiffEntry ent)
throws IOException {
if (ent.getOldMode() == GITLINK) {
o.write(encodeASCII("-Subproject commit " + ent.getOldId().name()
+ "\n"));
}
if (ent.getNewMode() == GITLINK) {
o.write(encodeASCII("+Subproject commit " + ent.getNewId().name()
+ "\n"));
}
}
private void writeDiffHeader(OutputStream o, DiffEntry ent)
throws IOException {
String oldName = quotePath("a/" + ent.getOldName());
String newName = quotePath("b/" + ent.getNewName());
o.write(encode("diff --git " + oldName + " " + newName + "\n"));
switch (ent.getChangeType()) {
case ADD:
o.write(encodeASCII("new file mode "));
ent.getNewMode().copyTo(o);
o.write('\n');
break;
case DELETE:
o.write(encodeASCII("deleted file mode "));
ent.getOldMode().copyTo(o);
o.write('\n');
break;
case RENAME:
o.write(encodeASCII("similarity index " + ent.getScore() + "%"));
o.write('\n');
o.write(encode("rename from " + quotePath(ent.getOldName())));
o.write('\n');
o.write(encode("rename to " + quotePath(ent.getNewName())));
o.write('\n');
break;
case COPY:
o.write(encodeASCII("similarity index " + ent.getScore() + "%"));
o.write('\n');
o.write(encode("copy from " + quotePath(ent.getOldName())));
o.write('\n');
o.write(encode("copy to " + quotePath(ent.getNewName())));
o.write('\n');
if (!ent.getOldMode().equals(ent.getNewMode())) {
o.write(encodeASCII("new file mode "));
ent.getNewMode().copyTo(o);
o.write('\n');
}
break;
}
switch (ent.getChangeType()) {
case RENAME:
case MODIFY:
if (!ent.getOldMode().equals(ent.getNewMode())) {
o.write(encodeASCII("old mode "));
ent.getOldMode().copyTo(o);
o.write('\n');
o.write(encodeASCII("new mode "));
ent.getNewMode().copyTo(o);
o.write('\n');
}
}
o.write(encodeASCII("index " //
+ format(ent.getOldId()) //
+ ".." //
+ format(ent.getNewId())));
if (ent.getOldMode().equals(ent.getNewMode())) {
o.write(' ');
ent.getNewMode().copyTo(o);
}
o.write('\n');
o.write(encode("--- " + oldName + '\n'));
o.write(encode("+++ " + newName + '\n'));
}
private String format(AbbreviatedObjectId oldId) {
if (oldId.isComplete() && db != null)
oldId = oldId.toObjectId().abbreviate(db, abbreviationLength);
return oldId.name();
}
private static String quotePath(String name) {
String q = QuotedString.GIT_PATH.quote(name);
return ('"' + name + '"').equals(q) ? name : q;
}
private byte[] open(FileMode mode, AbbreviatedObjectId id)
throws IOException {
if (mode == FileMode.MISSING)
return new byte[] {};
if (mode.getObjectType() != Constants.OBJ_BLOB)
return new byte[] {};
if (db == null)
throw new IllegalStateException(JGitText.get().repositoryIsRequired);
if (id.isComplete()) {
ObjectLoader ldr = db.openObject(id.toObjectId());
return ldr.getCachedBytes();
}
return new byte[] {};
}
/**
* Format a patch script, reusing a previously parsed FileHeader.
* <p>
@ -87,8 +333,6 @@ public void setContext(final int lineCount) {
* to increase or reduce the number of lines of context within the script.
* All header lines are reused as-is from the supplied FileHeader.
*
* @param out
* stream to write the patch script out to.
* @param head
* existing file header containing the header lines to copy.
* @param a
@ -100,8 +344,8 @@ public void setContext(final int lineCount) {
* @throws IOException
* writing to the supplied stream failed.
*/
public void format(final OutputStream out, final FileHeader head,
final RawText a, final RawText b) throws IOException {
public void format(final FileHeader head, final RawText a, final RawText b)
throws IOException {
// Reuse the existing FileHeader as-is by blindly copying its
// header lines, but avoiding its hunks. Instead we recreate
// the hunks from the text instances we have been supplied.
@ -112,19 +356,22 @@ public void format(final OutputStream out, final FileHeader head,
end = head.getHunks().get(0).getStartOffset();
out.write(head.getBuffer(), start, end - start);
formatEdits(out, a, b, head.toEditList());
formatEdits(a, b, head.toEditList());
}
/**
* Formats a list of edits in unified diff format
* @param out where the unified diff is written to
* @param a the text A which was compared
* @param b the text B which was compared
* @param edits some differences which have been calculated between A and B
*
* @param a
* the text A which was compared
* @param b
* the text B which was compared
* @param edits
* some differences which have been calculated between A and B
* @throws IOException
*/
public void formatEdits(final OutputStream out, final RawText a,
final RawText b, final EditList edits) throws IOException {
public void formatEdits(final RawText a, final RawText b,
final EditList edits) throws IOException {
for (int curIdx = 0; curIdx < edits.size();) {
Edit curEdit = edits.get(curIdx);
final int endIdx = findCombinedEnd(edits, curIdx);
@ -135,18 +382,24 @@ public void formatEdits(final OutputStream out, final RawText a,
final int aEnd = Math.min(a.size(), endEdit.getEndA() + context);
final int bEnd = Math.min(b.size(), endEdit.getEndB() + context);
writeHunkHeader(out, aCur, aEnd, bCur, bEnd);
writeHunkHeader(aCur, aEnd, bCur, bEnd);
while (aCur < aEnd || bCur < bEnd) {
if (aCur < curEdit.getBeginA() || endIdx + 1 < curIdx) {
writeContextLine(out, a, aCur, isEndOfLineMissing(a, aCur));
writeContextLine(a, aCur);
if (isEndOfLineMissing(a, aCur))
out.write(noNewLine);
aCur++;
bCur++;
} else if (aCur < curEdit.getEndA()) {
writeRemovedLine(out, a, aCur, isEndOfLineMissing(a, aCur));
writeRemovedLine(a, aCur);
if (isEndOfLineMissing(a, aCur))
out.write(noNewLine);
aCur++;
} else if (bCur < curEdit.getEndB()) {
writeAddedLine(out, b, bCur, isEndOfLineMissing(b, bCur));
writeAddedLine(b, bCur);
if (isEndOfLineMissing(b, bCur))
out.write(noNewLine);
bCur++;
}
@ -157,21 +410,17 @@ public void formatEdits(final OutputStream out, final RawText a,
}
/**
* Output a line of diff context
* Output a line of context (unmodified line).
*
* @param out
* OutputStream
* @param text
* RawText for accessing raw data
* @param line
* the line number within text
* @param endOfLineMissing
* true if we should add the GNU end of line missing warning
* @throws IOException
*/
protected void writeContextLine(final OutputStream out, final RawText text,
final int line, boolean endOfLineMissing) throws IOException {
writeLine(out, ' ', text, line, endOfLineMissing);
protected void writeContextLine(final RawText text, final int line)
throws IOException {
writeLine(' ', text, line);
}
private boolean isEndOfLineMissing(final RawText text, final int line) {
@ -179,46 +428,36 @@ private boolean isEndOfLineMissing(final RawText text, final int line) {
}
/**
* Output an added line
* Output an added line.
*
* @param out
* OutputStream
* @param text
* RawText for accessing raw data
* @param line
* the line number within text
* @param endOfLineMissing
* true if we should add the gnu end of line missing warning
* @throws IOException
*/
protected void writeAddedLine(final OutputStream out, final RawText text, final int line, boolean endOfLineMissing)
protected void writeAddedLine(final RawText text, final int line)
throws IOException {
writeLine(out, '+', text, line, endOfLineMissing);
writeLine('+', text, line);
}
/**
* Output a removed line
*
* @param out
* OutputStream
* @param text
* RawText for accessing raw data
* @param line
* the line number within text
* @param endOfLineMissing
* true if we should add the gnu end of line missing warning
* @throws IOException
*/
protected void writeRemovedLine(final OutputStream out, final RawText text,
final int line, boolean endOfLineMissing) throws IOException {
writeLine(out, '-', text, line, endOfLineMissing);
protected void writeRemovedLine(final RawText text, final int line)
throws IOException {
writeLine('-', text, line);
}
/**
* Output a hunk header
*
* @param out
* OutputStream
* @param aStartLine
* within first source
* @param aEndLine
@ -229,20 +468,20 @@ protected void writeRemovedLine(final OutputStream out, final RawText text,
* within second source
* @throws IOException
*/
protected void writeHunkHeader(final OutputStream out, int aStartLine, int aEndLine,
protected void writeHunkHeader(int aStartLine, int aEndLine,
int bStartLine, int bEndLine) throws IOException {
out.write('@');
out.write('@');
writeRange(out, '-', aStartLine + 1, aEndLine - aStartLine);
writeRange(out, '+', bStartLine + 1, bEndLine - bStartLine);
writeRange('-', aStartLine + 1, aEndLine - aStartLine);
writeRange('+', bStartLine + 1, bEndLine - bStartLine);
out.write(' ');
out.write('@');
out.write('@');
out.write('\n');
}
private static void writeRange(final OutputStream out, final char prefix,
final int begin, final int cnt) throws IOException {
private void writeRange(final char prefix, final int begin, final int cnt)
throws IOException {
out.write(' ');
out.write(prefix);
switch (cnt) {
@ -271,18 +510,75 @@ private static void writeRange(final OutputStream out, final char prefix,
}
}
private static void writeLine(final OutputStream out, final char prefix,
final RawText text, final int cur, boolean noNewLineIndicator) throws IOException {
/**
* Write a standard patch script line.
*
* @param prefix
* prefix before the line, typically '-', '+', ' '.
* @param text
* the text object to obtain the line from.
* @param cur
* line number to output.
* @throws IOException
* the stream threw an exception while writing to it.
*/
protected void writeLine(final char prefix, final RawText text,
final int cur) throws IOException {
out.write(prefix);
text.writeLine(out, cur);
out.write('\n');
if (noNewLineIndicator)
writeNoNewLine(out);
}
private static void writeNoNewLine(final OutputStream out)
throws IOException {
out.write(noNewLine);
/**
* Creates a {@link FileHeader} representing the given {@link DiffEntry}
* <p>
* This method does not use the OutputStream associated with this
* DiffFormatter instance. It is therefore safe to instantiate this
* DiffFormatter instance with a {@link DisabledOutputStream} if this method
* is the only one that will be used.
*
* @param ent
* the DiffEntry to create the FileHeader for
* @return a FileHeader representing the DiffEntry. The FileHeader's buffer
* will contain only the header of the diff output. It will also
* contain one {@link HunkHeader}.
* @throws IOException
* the stream threw an exception while writing to it, or one of
* the blobs referenced by the DiffEntry could not be read.
* @throws CorruptObjectException
* one of the blobs referenced by the DiffEntry is corrupt.
* @throws MissingObjectException
* one of the blobs referenced by the DiffEntry is missing.
*/
public FileHeader createFileHeader(DiffEntry ent) throws IOException,
CorruptObjectException, MissingObjectException {
ByteArrayOutputStream buf = new ByteArrayOutputStream();
final EditList editList;
final FileHeader.PatchType type;
writeDiffHeader(buf, ent);
if (ent.getOldMode() == GITLINK || ent.getNewMode() == GITLINK) {
writeGitLinkDiffText(buf, ent);
editList = new EditList();
type = PatchType.UNIFIED;
} else {
byte[] aRaw = open(ent.getOldMode(), ent.getOldId());
byte[] bRaw = open(ent.getNewMode(), ent.getNewId());
if (RawText.isBinary(aRaw) || RawText.isBinary(bRaw)) {
buf.write(encodeASCII("Binary files differ\n"));
editList = new EditList();
type = PatchType.BINARY;
} else {
RawText a = rawTextFactory.create(aRaw);
RawText b = rawTextFactory.create(bRaw);
editList = new MyersDiff(a, b).getEdits();
type = PatchType.UNIFIED;
}
}
return new FileHeader(buf.toByteArray(), editList, type);
}
private int findCombinedEnd(final List<Edit> edits, final int i) {

View File

@ -65,6 +65,25 @@
* they are converting from "line number" to "element index".
*/
public class RawText implements Sequence {
/** Creates a RawText instance. */
public static interface Factory {
/**
* Construct a RawText instance for the content.
*
* @param input
* the content array.
* @return a RawText instance wrapping this content.
*/
RawText create(byte[] input);
}
/** Creates RawText that does not treat whitespace specially. */
public static final Factory FACTORY = new Factory() {
public RawText create(byte[] input) {
return new RawText(input);
}
};
/** Number of bytes to check for heuristics in {@link #isBinary(byte[])} */
private static final int FIRST_FEW_BYTES = 8000;

View File

@ -51,6 +51,13 @@
* A version of {@link RawText} that ignores all whitespace.
*/
public class RawTextIgnoreAllWhitespace extends RawText {
/** Creates RawText that ignores all whitespace. */
@SuppressWarnings("hiding")
public static final Factory FACTORY = new Factory() {
public RawText create(byte[] input) {
return new RawTextIgnoreAllWhitespace(input);
}
};
/**
* Create a new sequence from an existing content byte array.

View File

@ -50,6 +50,13 @@
* A version of {@link RawText} that ignores leading whitespace.
*/
public class RawTextIgnoreLeadingWhitespace extends RawText {
/** Creates RawText that ignores only leading whitespace. */
@SuppressWarnings("hiding")
public static final Factory FACTORY = new Factory() {
public RawText create(byte[] input) {
return new RawTextIgnoreLeadingWhitespace(input);
}
};
/**
* Create a new sequence from an existing content byte array.

View File

@ -50,6 +50,13 @@
* A version of {@link RawText} that ignores trailing whitespace.
*/
public class RawTextIgnoreTrailingWhitespace extends RawText {
/** Creates RawText that ignores only trailing whitespace. */
@SuppressWarnings("hiding")
public static final Factory FACTORY = new Factory() {
public RawText create(byte[] input) {
return new RawTextIgnoreTrailingWhitespace(input);
}
};
/**
* Create a new sequence from an existing content byte array.

View File

@ -45,14 +45,21 @@
package org.eclipse.jgit.diff;
import static org.eclipse.jgit.util.RawCharUtil.isWhitespace;
import static org.eclipse.jgit.util.RawCharUtil.trimTrailingWhitespace;
import static org.eclipse.jgit.util.RawCharUtil.trimLeadingWhitespace;
import static org.eclipse.jgit.util.RawCharUtil.trimTrailingWhitespace;
/**
* A version of {@link RawText} that ignores changes in the amount of
* whitespace, as well as trailing whitespace.
*/
public class RawTextIgnoreWhitespaceChange extends RawText {
/** Creates RawText that ignores only whitespace changes. */
@SuppressWarnings("hiding")
public static final Factory FACTORY = new Factory() {
public RawText create(byte[] input) {
return new RawTextIgnoreWhitespaceChange(input);
}
};
/**
* Create a new sequence from an existing content byte array.

View File

@ -0,0 +1,536 @@
/*
* Copyright (C) 2010, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.diff;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import org.eclipse.jgit.JGitText;
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
import org.eclipse.jgit.lib.AbbreviatedObjectId;
import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.lib.NullProgressMonitor;
import org.eclipse.jgit.lib.ProgressMonitor;
import org.eclipse.jgit.lib.Repository;
/** Detect and resolve object renames. */
public class RenameDetector {
private static final int EXACT_RENAME_SCORE = 100;
private static final Comparator<DiffEntry> DIFF_COMPARATOR = new Comparator<DiffEntry>() {
public int compare(DiffEntry a, DiffEntry b) {
int cmp = nameOf(a).compareTo(nameOf(b));
if (cmp == 0)
cmp = sortOf(a.getChangeType()) - sortOf(b.getChangeType());
return cmp;
}
private String nameOf(DiffEntry ent) {
// Sort by the new name, unless the change is a delete. On
// deletes the new name is /dev/null, so we sort instead by
// the old name.
//
if (ent.changeType == ChangeType.DELETE)
return ent.oldName;
return ent.newName;
}
private int sortOf(ChangeType changeType) {
// Sort deletes before adds so that a major type change for
// a file path (such as symlink to regular file) will first
// remove the path, then add it back with the new type.
//
switch (changeType) {
case DELETE:
return 1;
case ADD:
return 2;
default:
return 10;
}
}
};
private final List<DiffEntry> entries = new ArrayList<DiffEntry>();
private List<DiffEntry> deleted = new ArrayList<DiffEntry>();
private List<DiffEntry> added = new ArrayList<DiffEntry>();
private boolean done;
private final Repository repo;
/** Similarity score required to pair an add/delete as a rename. */
private int renameScore = 60;
/** Limit in the number of files to consider for renames. */
private int renameLimit;
/** Set if the number of adds or deletes was over the limit. */
private boolean overRenameLimit;
/**
* Create a new rename detector for the given repository
*
* @param repo
* the repository to use for rename detection
*/
public RenameDetector(Repository repo) {
this.repo = repo;
DiffConfig cfg = repo.getConfig().get(DiffConfig.KEY);
renameLimit = cfg.getRenameLimit();
}
/**
* @return minimum score required to pair an add/delete as a rename. The
* score ranges are within the bounds of (0, 100).
*/
public int getRenameScore() {
return renameScore;
}
/**
* Set the minimum score required to pair an add/delete as a rename.
* <p>
* When comparing two files together their score must be greater than or
* equal to the rename score for them to be considered a rename match. The
* score is computed based on content similarity, so a score of 60 implies
* that approximately 60% of the bytes in the files are identical.
*
* @param score
* new rename score, must be within [0, 100].
* @throws IllegalArgumentException
* the score was not within [0, 100].
*/
public void setRenameScore(int score) {
if (score < 0 || score > 100)
throw new IllegalArgumentException(
JGitText.get().similarityScoreMustBeWithinBounds);
renameScore = score;
}
/** @return limit on number of paths to perform inexact rename detection. */
public int getRenameLimit() {
return renameLimit;
}
/**
* Set the limit on the number of files to perform inexact rename detection.
* <p>
* The rename detector has to build a square matrix of the rename limit on
* each side, then perform that many file compares to determine similarity.
* If 1000 files are added, and 1000 files are deleted, a 1000*1000 matrix
* must be allocated, and 1,000,000 file compares may need to be performed.
*
* @param limit
* new file limit.
*/
public void setRenameLimit(int limit) {
renameLimit = limit;
}
/**
* Check if the detector is over the rename limit.
* <p>
* This method can be invoked either before or after {@code getEntries} has
* been used to perform rename detection.
*
* @return true if the detector has more file additions or removals than the
* rename limit is currently set to. In such configurations the
* detector will skip expensive computation.
*/
public boolean isOverRenameLimit() {
if (done)
return overRenameLimit;
int cnt = Math.max(added.size(), deleted.size());
return getRenameLimit() != 0 && getRenameLimit() < cnt;
}
/**
* Add entries to be considered for rename detection.
*
* @param entriesToAdd
* one or more entries to add.
* @throws IllegalStateException
* if {@code getEntries} was already invoked.
*/
public void addAll(Collection<DiffEntry> entriesToAdd) {
if (done)
throw new IllegalStateException(JGitText.get().renamesAlreadyFound);
for (DiffEntry entry : entriesToAdd) {
switch (entry.getChangeType()) {
case ADD:
added.add(entry);
break;
case DELETE:
deleted.add(entry);
break;
case MODIFY:
if (sameType(entry.getOldMode(), entry.getNewMode()))
entries.add(entry);
else
entries.addAll(DiffEntry.breakModify(entry));
break;
case COPY:
case RENAME:
default:
entriesToAdd.add(entry);
}
}
}
/**
* Add an entry to be considered for rename detection.
*
* @param entry
* to add.
* @throws IllegalStateException
* if {@code getEntries} was already invoked.
*/
public void add(DiffEntry entry) {
addAll(Collections.singletonList(entry));
}
/**
* Detect renames in the current file set.
* <p>
* This convenience function runs without a progress monitor.
*
* @return an unmodifiable list of {@link DiffEntry}s representing all files
* that have been changed.
* @throws IOException
* file contents cannot be read from the repository.
*/
public List<DiffEntry> compute() throws IOException {
return compute(NullProgressMonitor.INSTANCE);
}
/**
* Detect renames in the current file set.
*
* @param pm
* report progress during the detection phases.
* @return an unmodifiable list of {@link DiffEntry}s representing all files
* that have been changed.
* @throws IOException
* file contents cannot be read from the repository.
*/
public List<DiffEntry> compute(ProgressMonitor pm) throws IOException {
if (!done) {
done = true;
if (pm == null)
pm = NullProgressMonitor.INSTANCE;
findExactRenames(pm);
findContentRenames(pm);
entries.addAll(added);
added = null;
entries.addAll(deleted);
deleted = null;
Collections.sort(entries, DIFF_COMPARATOR);
}
return Collections.unmodifiableList(entries);
}
private void findContentRenames(ProgressMonitor pm) throws IOException {
int cnt = Math.max(added.size(), deleted.size());
if (cnt == 0)
return;
if (getRenameLimit() == 0 || cnt <= getRenameLimit()) {
SimilarityRenameDetector d;
d = new SimilarityRenameDetector(repo, deleted, added);
d.setRenameScore(getRenameScore());
d.compute(pm);
deleted = d.getLeftOverSources();
added = d.getLeftOverDestinations();
entries.addAll(d.getMatches());
} else {
overRenameLimit = true;
}
}
@SuppressWarnings("unchecked")
private void findExactRenames(ProgressMonitor pm) {
if (added.isEmpty() || deleted.isEmpty())
return;
pm.beginTask(JGitText.get().renamesFindingExact, //
added.size() + added.size() + deleted.size()
+ added.size() * deleted.size());
HashMap<AbbreviatedObjectId, Object> deletedMap = populateMap(deleted, pm);
HashMap<AbbreviatedObjectId, Object> addedMap = populateMap(added, pm);
ArrayList<DiffEntry> uniqueAdds = new ArrayList<DiffEntry>(added.size());
ArrayList<List<DiffEntry>> nonUniqueAdds = new ArrayList<List<DiffEntry>>();
for (Object o : addedMap.values()) {
if (o instanceof DiffEntry)
uniqueAdds.add((DiffEntry) o);
else
nonUniqueAdds.add((List<DiffEntry>) o);
}
ArrayList<DiffEntry> left = new ArrayList<DiffEntry>(added.size());
for (DiffEntry a : uniqueAdds) {
Object del = deletedMap.get(a.newId);
if (del instanceof DiffEntry) {
// We have one add to one delete: pair them if they are the same
// type
DiffEntry e = (DiffEntry) del;
if (sameType(e.oldMode, a.newMode)) {
e.changeType = ChangeType.RENAME;
entries.add(exactRename(e, a));
} else {
left.add(a);
}
} else if (del != null) {
// We have one add to many deletes: find the delete with the
// same type and closest name to the add, then pair them
List<DiffEntry> list = (List<DiffEntry>) del;
DiffEntry best = bestPathMatch(a, list);
if (best != null) {
best.changeType = ChangeType.RENAME;
entries.add(exactRename(best, a));
} else {
left.add(a);
}
} else {
left.add(a);
}
pm.update(1);
}
for (List<DiffEntry> adds : nonUniqueAdds) {
Object o = deletedMap.get(adds.get(0).newId);
if (o instanceof DiffEntry) {
// We have many adds to one delete: find the add with the same
// type and closest name to the delete, then pair them. Mark the
// rest as copies of the delete.
DiffEntry d = (DiffEntry) o;
DiffEntry best = bestPathMatch(d, adds);
if (best != null) {
d.changeType = ChangeType.RENAME;
entries.add(exactRename(d, best));
for (DiffEntry a : adds) {
if (a != best) {
if (sameType(d.oldMode, a.newMode)) {
entries.add(exactCopy(d, a));
} else {
left.add(a);
}
}
}
} else {
left.addAll(adds);
}
} else {
// We have many adds to many deletes: score all the adds against
// all the deletes by path name, take the best matches, pair
// them as renames, then call the rest copies
List<DiffEntry> dels = (List<DiffEntry>) o;
long[] matrix = new long[dels.size() * adds.size()];
int mNext = 0;
for (int addIdx = 0; addIdx < adds.size(); addIdx++) {
String addedName = adds.get(addIdx).newName;
for (int delIdx = 0; delIdx < dels.size(); delIdx++) {
String deletedName = dels.get(delIdx).oldName;
int score = SimilarityRenameDetector.nameScore(addedName, deletedName);
matrix[mNext] = SimilarityRenameDetector.encode(score, addIdx, delIdx);
mNext++;
}
}
Arrays.sort(matrix);
for (--mNext; mNext >= 0; mNext--) {
long ent = matrix[mNext];
int delIdx = SimilarityRenameDetector.srcFile(ent);
int addIdx = SimilarityRenameDetector.dstFile(ent);
DiffEntry d = dels.get(delIdx);
DiffEntry a = adds.get(addIdx);
if (a == null) {
pm.update(1);
continue; // was already matched earlier
}
ChangeType type;
if (d.changeType == ChangeType.DELETE) {
// First use of this source file. Tag it as a rename so we
// later know it is already been used as a rename, other
// matches (if any) will claim themselves as copies instead.
//
d.changeType = ChangeType.RENAME;
type = ChangeType.RENAME;
} else {
type = ChangeType.COPY;
}
entries.add(DiffEntry.pair(type, d, a, 100));
adds.set(addIdx, null); // Claim the destination was matched.
pm.update(1);
}
}
}
added = left;
deleted = new ArrayList<DiffEntry>(deletedMap.size());
for (Object o : deletedMap.values()) {
if (o instanceof DiffEntry) {
DiffEntry e = (DiffEntry) o;
if (e.changeType == ChangeType.DELETE)
deleted.add(e);
} else {
List<DiffEntry> list = (List<DiffEntry>) o;
for (DiffEntry e : list) {
if (e.changeType == ChangeType.DELETE)
deleted.add(e);
}
}
}
pm.endTask();
}
/**
* Find the best match by file path for a given DiffEntry from a list of
* DiffEntrys. The returned DiffEntry will be of the same type as <src>. If
* no DiffEntry can be found that has the same type, this method will return
* null.
*
* @param src
* the DiffEntry to try to find a match for
* @param list
* a list of DiffEntrys to search through
* @return the DiffEntry from <list> who's file path best matches <src>
*/
private static DiffEntry bestPathMatch(DiffEntry src, List<DiffEntry> list) {
DiffEntry best = null;
int score = -1;
for (DiffEntry d : list) {
if (sameType(mode(d), mode(src))) {
int tmp = SimilarityRenameDetector
.nameScore(path(d), path(src));
if (tmp > score) {
best = d;
score = tmp;
}
}
}
return best;
}
@SuppressWarnings("unchecked")
private HashMap<AbbreviatedObjectId, Object> populateMap(
List<DiffEntry> diffEntries, ProgressMonitor pm) {
HashMap<AbbreviatedObjectId, Object> map = new HashMap<AbbreviatedObjectId, Object>();
for (DiffEntry de : diffEntries) {
Object old = map.put(id(de), de);
if (old instanceof DiffEntry) {
ArrayList<DiffEntry> list = new ArrayList<DiffEntry>(2);
list.add((DiffEntry) old);
list.add(de);
map.put(id(de), list);
} else if (old != null) {
// Must be a list of DiffEntries
((List<DiffEntry>) old).add(de);
map.put(id(de), old);
}
pm.update(1);
}
return map;
}
private static String path(DiffEntry de) {
return de.changeType == ChangeType.DELETE ? de.oldName : de.newName;
}
private static FileMode mode(DiffEntry de) {
return de.changeType == ChangeType.DELETE ? de.oldMode : de.newMode;
}
private static AbbreviatedObjectId id(DiffEntry de) {
return de.changeType == ChangeType.DELETE ? de.oldId : de.newId;
}
static boolean sameType(FileMode a, FileMode b) {
// Files have to be of the same type in order to rename them.
// We would never want to rename a file to a gitlink, or a
// symlink to a file.
//
int aType = a.getBits() & FileMode.TYPE_MASK;
int bType = b.getBits() & FileMode.TYPE_MASK;
return aType == bType;
}
private static DiffEntry exactRename(DiffEntry src, DiffEntry dst) {
return DiffEntry.pair(ChangeType.RENAME, src, dst, EXACT_RENAME_SCORE);
}
private static DiffEntry exactCopy(DiffEntry src, DiffEntry dst) {
return DiffEntry.pair(ChangeType.COPY, src, dst, EXACT_RENAME_SCORE);
}
}

View File

@ -0,0 +1,297 @@
/*
* Copyright (C) 2010, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.diff;
import java.util.Arrays;
import org.eclipse.jgit.lib.ObjectLoader;
/**
* Index structure of lines/blocks in one file.
* <p>
* This structure can be used to compute an approximation of the similarity
* between two files. The index is used by {@link SimilarityRenameDetector} to
* compute scores between files.
* <p>
* To save space in memory, this index uses a space efficient encoding which
* will not exceed 1 MiB per instance. The index starts out at a smaller size
* (closer to 2 KiB), but may grow as more distinct blocks within the scanned
* file are discovered.
*/
class SimilarityIndex {
/** The {@link #idHash} table stops growing at {@code 1 << MAX_HASH_BITS}. */
private static final int MAX_HASH_BITS = 17;
/** The {@link #idHash} table will not grow bigger than this, ever. */
private static final int MAX_HASH_SIZE = 1 << MAX_HASH_BITS;
/** Prime just before {@link #MAX_HASH_SIZE}. */
private static final int P = 131071;
/**
* Shift to apply before storing a key.
* <p>
* Within the 64 bit table record space, we leave the highest bit unset so
* all values are positive, and we need {@link #MAX_HASH_BITS} bits for the
* keys. The lower 32 bits are used to count bytes impacted.
*/
private static final int KEY_SHIFT = 64 - 1 - MAX_HASH_BITS;
/** Total size of the file we hashed into the structure. */
private long fileSize;
/** Number of non-zero entries in {@link #idHash}. */
private int idSize;
/**
* Pairings of content keys and counters.
* <p>
* Slots in the table are actually two ints wedged into a single long. The
* upper {@link #MAX_HASH_BITS} bits stores the content key, and the
* remaining lower bits stores the number of bytes associated with that key.
* Empty slots are denoted by 0, which cannot occur because the count cannot
* be 0. Values can only be positive, which we enforce during key addition.
*/
private long[] idHash;
SimilarityIndex() {
idHash = new long[256];
}
long getFileSize() {
return fileSize;
}
void setFileSize(long size) {
fileSize = size;
}
void hash(ObjectLoader obj) {
byte[] raw = obj.getCachedBytes();
setFileSize(raw.length);
hash(raw, 0, raw.length);
}
void hash(byte[] raw, int ptr, final int end) {
while (ptr < end) {
int hash = 5381;
int start = ptr;
// Hash one line, or one block, whichever occurs first.
do {
int c = raw[ptr++] & 0xff;
if (c == '\n')
break;
hash = (hash << 5) ^ c;
} while (ptr < end && ptr - start < 64);
add(hash, ptr - start);
}
}
/**
* Sort the internal table so it can be used for efficient scoring.
* <p>
* Once sorted, additional lines/blocks cannot be added to the index.
*/
void sort() {
// Sort the array. All of the empty space will wind up at the front,
// because we forced all of the keys to always be positive. Later
// we only work with the back half of the array.
//
Arrays.sort(idHash);
}
int score(SimilarityIndex dst, int maxScore) {
long max = Math.max(fileSize, dst.fileSize);
if (max == 0)
return maxScore;
return (int) ((common(dst) * maxScore) / max);
}
int common(SimilarityIndex dst) {
return common(this, dst);
}
private static int common(SimilarityIndex src, SimilarityIndex dst) {
int srcIdx = src.packedIndex(0);
int dstIdx = dst.packedIndex(0);
long[] srcHash = src.idHash;
long[] dstHash = dst.idHash;
return common(srcHash, srcIdx, dstHash, dstIdx);
}
private static int common(long[] srcHash, int srcIdx, //
long[] dstHash, int dstIdx) {
if (srcIdx == srcHash.length || dstIdx == dstHash.length)
return 0;
int common = 0;
int srcKey = keyOf(srcHash[srcIdx]);
int dstKey = keyOf(dstHash[dstIdx]);
for (;;) {
if (srcKey == dstKey) {
common += countOf(dstHash[dstIdx]);
if (++srcIdx == srcHash.length)
break;
srcKey = keyOf(srcHash[srcIdx]);
if (++dstIdx == dstHash.length)
break;
dstKey = keyOf(dstHash[dstIdx]);
} else if (srcKey < dstKey) {
// Regions of src which do not appear in dst.
if (++srcIdx == srcHash.length)
break;
srcKey = keyOf(srcHash[srcIdx]);
} else /* if (srcKey > dstKey) */{
// Regions of dst which do not appear in dst.
if (++dstIdx == dstHash.length)
break;
dstKey = keyOf(dstHash[dstIdx]);
}
}
return common;
}
// Testing only
int size() {
return idSize;
}
// Testing only
int key(int idx) {
return keyOf(idHash[packedIndex(idx)]);
}
// Testing only
long count(int idx) {
return countOf(idHash[packedIndex(idx)]);
}
// Brute force approach only for testing.
int findIndex(int key) {
for (int i = 0; i < idSize; i++)
if (key(i) == key)
return i;
return -1;
}
private int packedIndex(int idx) {
return (idHash.length - idSize) + idx;
}
void add(int key, int cnt) {
key = hash(key);
int j = slot(key);
for (;;) {
long v = idHash[j];
if (v == 0) {
// Empty slot in the table, store here.
if (shouldGrow()) {
grow();
j = slot(key);
continue;
}
idHash[j] = (((long) key) << KEY_SHIFT) | cnt;
idSize++;
return;
} else if (keyOf(v) == key) {
// Same key, increment the counter.
idHash[j] = v + cnt;
return;
} else if (++j >= idHash.length) {
j = 0;
}
}
}
private static int hash(int key) {
// Make the key fit into our table. Since we have a maximum size
// that we cap the table at, all keys get squashed before going
// into the table. This prevents overflow.
//
return (key >>> 1) % P;
}
private int slot(int key) {
return key % idHash.length;
}
private boolean shouldGrow() {
int n = idHash.length;
return n < MAX_HASH_SIZE && n <= idSize * 2;
}
private void grow() {
long[] oldHash = idHash;
int oldSize = idHash.length;
idHash = new long[2 * oldSize];
for (int i = 0; i < oldSize; i++) {
long v = oldHash[i];
if (v != 0) {
int j = slot(keyOf(v));
while (idHash[j] != 0)
if (++j >= idHash.length)
j = 0;
idHash[j] = v;
}
}
}
private static int keyOf(long v) {
return (int) (v >>> KEY_SHIFT);
}
private static int countOf(long v) {
return (int) v;
}
}

View File

@ -0,0 +1,381 @@
/*
* Copyright (C) 2010, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.diff;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.eclipse.jgit.JGitText;
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.lib.NullProgressMonitor;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ProgressMonitor;
import org.eclipse.jgit.lib.Repository;
class SimilarityRenameDetector {
/**
* Number of bits we need to express an index into src or dst list.
* <p>
* This must be 28, giving us a limit of 2^28 entries in either list, which
* is an insane limit of 536,870,912 file names being considered in a single
* rename pass. The other 8 bits are used to store the score, while staying
* under 127 so the long doesn't go negative.
*/
private static final int BITS_PER_INDEX = 28;
private static final int INDEX_MASK = (1 << BITS_PER_INDEX) - 1;
private static final int SCORE_SHIFT = 2 * BITS_PER_INDEX;
private final Repository repo;
/**
* All sources to consider for copies or renames.
* <p>
* A source is typically a {@link ChangeType#DELETE} change, but could be
* another type when trying to perform copy detection concurrently with
* rename detection.
*/
private List<DiffEntry> srcs;
/**
* All destinations to consider looking for a rename.
* <p>
* A destination is typically an {@link ChangeType#ADD}, as the name has
* just come into existence, and we want to discover where its initial
* content came from.
*/
private List<DiffEntry> dsts;
/**
* Matrix of all examined file pairs, and their scores.
* <p>
* The upper 8 bits of each long stores the score, but the score is bounded
* to be in the range (0, 128] so that the highest bit is never set, and all
* entries are therefore positive.
* <p>
* List indexes to an element of {@link #srcs} and {@link #dsts} are encoded
* as the lower two groups of 28 bits, respectively, but the encoding is
* inverted, so that 0 is expressed as {@code (1 << 28) - 1}. This sorts
* lower list indices later in the matrix, giving precedence to files whose
* names sort earlier in the tree.
*/
private long[] matrix;
/** Score a pair must exceed to be considered a rename. */
private int renameScore = 60;
private List<DiffEntry> out;
SimilarityRenameDetector(Repository repo, List<DiffEntry> srcs,
List<DiffEntry> dsts) {
this.repo = repo;
this.srcs = srcs;
this.dsts = dsts;
}
void setRenameScore(int score) {
renameScore = score;
}
void compute(ProgressMonitor pm) throws IOException {
if (pm == null)
pm = NullProgressMonitor.INSTANCE;
pm.beginTask(JGitText.get().renamesFindingByContent, //
2 * srcs.size() * dsts.size());
int mNext = buildMatrix(pm);
out = new ArrayList<DiffEntry>(Math.min(mNext, dsts.size()));
// Match rename pairs on a first come, first serve basis until
// we have looked at everything that is above our minimum score.
//
for (--mNext; mNext >= 0; mNext--) {
long ent = matrix[mNext];
int sIdx = srcFile(ent);
int dIdx = dstFile(ent);
DiffEntry s = srcs.get(sIdx);
DiffEntry d = dsts.get(dIdx);
if (d == null) {
pm.update(1);
continue; // was already matched earlier
}
ChangeType type;
if (s.changeType == ChangeType.DELETE) {
// First use of this source file. Tag it as a rename so we
// later know it is already been used as a rename, other
// matches (if any) will claim themselves as copies instead.
//
s.changeType = ChangeType.RENAME;
type = ChangeType.RENAME;
} else {
type = ChangeType.COPY;
}
out.add(DiffEntry.pair(type, s, d, score(ent)));
dsts.set(dIdx, null); // Claim the destination was matched.
pm.update(1);
}
srcs = compactSrcList(srcs);
dsts = compactDstList(dsts);
pm.endTask();
}
List<DiffEntry> getMatches() {
return out;
}
List<DiffEntry> getLeftOverSources() {
return srcs;
}
List<DiffEntry> getLeftOverDestinations() {
return dsts;
}
private static List<DiffEntry> compactSrcList(List<DiffEntry> in) {
ArrayList<DiffEntry> r = new ArrayList<DiffEntry>(in.size());
for (DiffEntry e : in) {
if (e.changeType == ChangeType.DELETE)
r.add(e);
}
return r;
}
private static List<DiffEntry> compactDstList(List<DiffEntry> in) {
ArrayList<DiffEntry> r = new ArrayList<DiffEntry>(in.size());
for (DiffEntry e : in) {
if (e != null)
r.add(e);
}
return r;
}
private int buildMatrix(ProgressMonitor pm) throws IOException {
// Allocate for the worst-case scenario where every pair has a
// score that we need to consider. We might not need that many.
//
matrix = new long[srcs.size() * dsts.size()];
long[] srcSizes = new long[srcs.size()];
long[] dstSizes = new long[dsts.size()];
// Init the size arrays to some value that indicates that we haven't
// calculated the size yet. Since sizes cannot be negative, -1 will work
Arrays.fill(srcSizes, -1);
Arrays.fill(dstSizes, -1);
// Consider each pair of files, if the score is above the minimum
// threshold we need record that scoring in the matrix so we can
// later find the best matches.
//
int mNext = 0;
for (int srcIdx = 0; srcIdx < srcs.size(); srcIdx++) {
DiffEntry srcEnt = srcs.get(srcIdx);
if (!isFile(srcEnt.oldMode)) {
pm.update(dsts.size());
continue;
}
SimilarityIndex s = hash(srcEnt.oldId.toObjectId());
for (int dstIdx = 0; dstIdx < dsts.size(); dstIdx++) {
DiffEntry dstEnt = dsts.get(dstIdx);
if (!isFile(dstEnt.newMode)) {
pm.update(1);
continue;
}
if (!RenameDetector.sameType(srcEnt.oldMode, dstEnt.newMode)) {
pm.update(1);
continue;
}
long srcSize = srcSizes[srcIdx];
if (srcSize < 0) {
srcSize = size(srcEnt.oldId.toObjectId());
srcSizes[srcIdx] = srcSize;
}
long dstSize = dstSizes[dstIdx];
if (dstSize < 0) {
dstSize = size(dstEnt.newId.toObjectId());
dstSizes[dstIdx] = dstSize;
}
long max = Math.max(srcSize, dstSize);
long min = Math.min(srcSize, dstSize);
if (min * 100 / max < renameScore) {
// Cannot possibly match, as the file sizes are so different
pm.update(1);
continue;
}
SimilarityIndex d = hash(dstEnt.newId.toObjectId());
int contentScore = s.score(d, 10000);
// nameScore returns a value between 0 and 100, but we want it
// to be in the same range as the content score. This allows it
// to be dropped into the pretty formula for the final score.
int nameScore = nameScore(srcEnt.oldName, dstEnt.newName) * 100;
int score = (contentScore * 99 + nameScore * 1) / 10000;
if (score < renameScore) {
pm.update(1);
continue;
}
matrix[mNext++] = encode(score, srcIdx, dstIdx);
pm.update(1);
}
}
// Sort everything in the range we populated, which might be the
// entire matrix, or just a smaller slice if we had some bad low
// scoring pairs.
//
Arrays.sort(matrix, 0, mNext);
return mNext;
}
static int nameScore(String a, String b) {
int aDirLen = a.lastIndexOf("/") + 1;
int bDirLen = b.lastIndexOf("/") + 1;
int dirMin = Math.min(aDirLen, bDirLen);
int dirMax = Math.max(aDirLen, bDirLen);
final int dirScoreLtr;
final int dirScoreRtl;
if (dirMax == 0) {
dirScoreLtr = 100;
dirScoreRtl = 100;
} else {
int dirSim = 0;
for (; dirSim < dirMin; dirSim++) {
if (a.charAt(dirSim) != b.charAt(dirSim))
break;
}
dirScoreLtr = (dirSim * 100) / dirMax;
if (dirScoreLtr == 100) {
dirScoreRtl = 100;
} else {
for (dirSim = 0; dirSim < dirMin; dirSim++) {
if (a.charAt(aDirLen - 1 - dirSim) != b.charAt(bDirLen - 1
- dirSim))
break;
}
dirScoreRtl = (dirSim * 100) / dirMax;
}
}
int fileMin = Math.min(a.length() - aDirLen, b.length() - bDirLen);
int fileMax = Math.max(a.length() - aDirLen, b.length() - bDirLen);
int fileSim = 0;
for (; fileSim < fileMin; fileSim++) {
if (a.charAt(a.length() - 1 - fileSim) != b.charAt(b.length() - 1
- fileSim))
break;
}
int fileScore = (fileSim * 100) / fileMax;
return (((dirScoreLtr + dirScoreRtl) * 25) + (fileScore * 50)) / 100;
}
private SimilarityIndex hash(ObjectId objectId) throws IOException {
SimilarityIndex r = new SimilarityIndex();
r.hash(repo.openObject(objectId));
r.sort();
return r;
}
private long size(ObjectId objectId) throws IOException {
return repo.openObject(objectId).getSize();
}
private static int score(long value) {
return (int) (value >>> SCORE_SHIFT);
}
static int srcFile(long value) {
return decodeFile(((int) (value >>> BITS_PER_INDEX)) & INDEX_MASK);
}
static int dstFile(long value) {
return decodeFile(((int) value) & INDEX_MASK);
}
static long encode(int score, int srcIdx, int dstIdx) {
return (((long) score) << SCORE_SHIFT) //
| (encodeFile(srcIdx) << BITS_PER_INDEX) //
| encodeFile(dstIdx);
}
private static long encodeFile(int idx) {
// We invert the index so that the first file in the list sorts
// later in the table. This permits us to break ties favoring
// earlier names over later ones.
//
return INDEX_MASK - idx;
}
private static int decodeFile(int v) {
return INDEX_MASK - v;
}
private static boolean isFile(FileMode mode) {
return (mode.getBits() & FileMode.TYPE_MASK) == FileMode.TYPE_FILE;
}
}

View File

@ -82,6 +82,21 @@ public static final AbbreviatedObjectId fromString(final byte[] buf,
return fromHexString(buf, offset, end);
}
/**
* Convert an AbbreviatedObjectId from an {@link AnyObjectId}.
* <p>
* This method copies over all bits of the Id, and is therefore complete
* (see {@link #isComplete()}).
*
* @param id
* the {@link ObjectId} to convert from.
* @return the converted object id.
*/
public static final AbbreviatedObjectId fromObjectId(AnyObjectId id) {
return new AbbreviatedObjectId(Constants.OBJECT_ID_STRING_LENGTH,
id.w1, id.w2, id.w3, id.w4, id.w5);
}
/**
* Convert an AbbreviatedObjectId from hex characters.
*

View File

@ -60,6 +60,7 @@
import java.util.List;
import org.eclipse.jgit.JGitText;
import org.eclipse.jgit.diff.DiffEntry;
import org.eclipse.jgit.diff.EditList;
import org.eclipse.jgit.lib.AbbreviatedObjectId;
import org.eclipse.jgit.lib.Constants;
@ -69,10 +70,7 @@
import org.eclipse.jgit.util.TemporaryBuffer;
/** Patch header describing an action for a single file path. */
public class FileHeader {
/** Magical file name used for file adds or deletes. */
public static final String DEV_NULL = "/dev/null";
public class FileHeader extends DiffEntry {
private static final byte[] OLD_MODE = encodeASCII("old mode ");
private static final byte[] NEW_MODE = encodeASCII("new mode ");
@ -103,24 +101,6 @@ public class FileHeader {
static final byte[] NEW_NAME = encodeASCII("+++ ");
/** General type of change a single file-level patch describes. */
public static enum ChangeType {
/** Add a new file to the project */
ADD,
/** Modify an existing file in the project (content and/or mode) */
MODIFY,
/** Delete an existing file from the project */
DELETE,
/** Rename an existing file to a new location */
RENAME,
/** Copy an existing file to a new location, keeping the original */
COPY;
}
/** Type of patch used by this file. */
public static enum PatchType {
/** A traditional unified diff style patch of a text file. */
@ -142,30 +122,6 @@ public static enum PatchType {
/** Position 1 past the end of this file within {@link #buf}. */
int endOffset;
/** File name of the old (pre-image). */
private String oldName;
/** File name of the new (post-image). */
private String newName;
/** Old mode of the file, if described by the patch, else null. */
private FileMode oldMode;
/** New mode of the file, if described by the patch, else null. */
protected FileMode newMode;
/** General type of change indicated by the patch. */
protected ChangeType changeType;
/** Similarity score if {@link #changeType} is a copy or rename. */
private int score;
/** ObjectId listed on the index line for the old (pre-image) */
private AbbreviatedObjectId oldId;
/** ObjectId listed on the index line for the new (post-image) */
protected AbbreviatedObjectId newId;
/** Type of patch used to modify this file */
PatchType patchType;
@ -178,6 +134,25 @@ public static enum PatchType {
/** If {@link #patchType} is {@link PatchType#GIT_BINARY}, the old image */
BinaryHunk reverseBinaryHunk;
/**
* Constructs a new FileHeader
*
* @param headerLines
* buffer holding the diff header for this file
* @param edits
* the edits for this file
* @param type
* the type of patch used to modify this file
*/
public FileHeader(final byte[] headerLines, EditList edits, PatchType type) {
this(headerLines, 0);
endOffset = headerLines.length;
int ptr = parseGitFileName(Patch.DIFF_GIT.length, headerLines.length);
ptr = parseGitHeaders(ptr, headerLines.length);
this.patchType = type;
addHunk(new HunkHeader(this, edits));
}
FileHeader(final byte[] b, final int offset) {
buf = b;
startOffset = offset;
@ -312,86 +287,6 @@ private String[] extractFileLines(final Charset[] csGuess) {
}
}
/**
* Get the old name associated with this file.
* <p>
* The meaning of the old name can differ depending on the semantic meaning
* of this patch:
* <ul>
* <li><i>file add</i>: always <code>/dev/null</code></li>
* <li><i>file modify</i>: always {@link #getNewName()}</li>
* <li><i>file delete</i>: always the file being deleted</li>
* <li><i>file copy</i>: source file the copy originates from</li>
* <li><i>file rename</i>: source file the rename originates from</li>
* </ul>
*
* @return old name for this file.
*/
public String getOldName() {
return oldName;
}
/**
* Get the new name associated with this file.
* <p>
* The meaning of the new name can differ depending on the semantic meaning
* of this patch:
* <ul>
* <li><i>file add</i>: always the file being created</li>
* <li><i>file modify</i>: always {@link #getOldName()}</li>
* <li><i>file delete</i>: always <code>/dev/null</code></li>
* <li><i>file copy</i>: destination file the copy ends up at</li>
* <li><i>file rename</i>: destination file the rename ends up at/li>
* </ul>
*
* @return new name for this file.
*/
public String getNewName() {
return newName;
}
/** @return the old file mode, if described in the patch */
public FileMode getOldMode() {
return oldMode;
}
/** @return the new file mode, if described in the patch */
public FileMode getNewMode() {
return newMode;
}
/** @return the type of change this patch makes on {@link #getNewName()} */
public ChangeType getChangeType() {
return changeType;
}
/**
* @return similarity score between {@link #getOldName()} and
* {@link #getNewName()} if {@link #getChangeType()} is
* {@link ChangeType#COPY} or {@link ChangeType#RENAME}.
*/
public int getScore() {
return score;
}
/**
* Get the old object id from the <code>index</code>.
*
* @return the object id; null if there is no index line
*/
public AbbreviatedObjectId getOldId() {
return oldId;
}
/**
* Get the new object id from the <code>index</code>.
*
* @return the object id; null if there is no index line
*/
public AbbreviatedObjectId getNewId() {
return newId;
}
/** @return style of patch used to modify this file */
public PatchType getPatchType() {
return patchType;

View File

@ -116,6 +116,8 @@ public int getLinesAdded() {
/** Total number of lines of context appearing in this hunk */
int nContext;
private EditList editList;
HunkHeader(final FileHeader fh, final int offset) {
this(fh, offset, new OldImage() {
@Override
@ -131,6 +133,21 @@ public AbbreviatedObjectId getId() {
old = oi;
}
HunkHeader(final FileHeader fh, final EditList editList) {
this(fh, fh.buf.length);
this.editList = editList;
endOffset = startOffset;
nContext = 0;
if (editList.isEmpty()) {
newStartLine = 0;
newLineCount = 0;
} else {
newStartLine = editList.get(0).getBeginB();
Edit last = editList.get(editList.size() - 1);
newLineCount = last.getEndB() - newStartLine;
}
}
/** @return header for the file this hunk applies to */
public FileHeader getFileHeader() {
return file;
@ -173,48 +190,50 @@ public int getLinesContext() {
/** @return a list describing the content edits performed within the hunk. */
public EditList toEditList() {
final EditList r = new EditList();
final byte[] buf = file.buf;
int c = nextLF(buf, startOffset);
int oLine = old.startLine;
int nLine = newStartLine;
Edit in = null;
if (editList == null) {
editList = new EditList();
final byte[] buf = file.buf;
int c = nextLF(buf, startOffset);
int oLine = old.startLine;
int nLine = newStartLine;
Edit in = null;
SCAN: for (; c < endOffset; c = nextLF(buf, c)) {
switch (buf[c]) {
case ' ':
case '\n':
in = null;
oLine++;
nLine++;
continue;
SCAN: for (; c < endOffset; c = nextLF(buf, c)) {
switch (buf[c]) {
case ' ':
case '\n':
in = null;
oLine++;
nLine++;
continue;
case '-':
if (in == null) {
in = new Edit(oLine - 1, nLine - 1);
r.add(in);
case '-':
if (in == null) {
in = new Edit(oLine - 1, nLine - 1);
editList.add(in);
}
oLine++;
in.extendA();
continue;
case '+':
if (in == null) {
in = new Edit(oLine - 1, nLine - 1);
editList.add(in);
}
nLine++;
in.extendB();
continue;
case '\\': // Matches "\ No newline at end of file"
continue;
default:
break SCAN;
}
oLine++;
in.extendA();
continue;
case '+':
if (in == null) {
in = new Edit(oLine - 1, nLine - 1);
r.add(in);
}
nLine++;
in.extendB();
continue;
case '\\': // Matches "\ No newline at end of file"
continue;
default:
break SCAN;
}
}
return r;
return editList;
}
void parseHeader() {

View File

@ -60,7 +60,7 @@
/** A parsed collection of {@link FileHeader}s from a unified diff patch file */
public class Patch {
private static final byte[] DIFF_GIT = encodeASCII("diff --git ");
static final byte[] DIFF_GIT = encodeASCII("diff --git ");
private static final byte[] DIFF_CC = encodeASCII("diff --cc ");

View File

@ -0,0 +1,120 @@
/*
* Copyright (C) 2010, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.revwalk;
import java.io.IOException;
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.treewalk.TreeWalk;
import org.eclipse.jgit.treewalk.filter.PathFilter;
import org.eclipse.jgit.treewalk.filter.TreeFilter;
/**
* Updates the internal path filter to follow copy/renames.
* <p>
* This is a special filter that performs {@code AND(path, ANY_DIFF)}, but also
* triggers rename detection so that the path node is updated to include a prior
* file name as the RevWalk traverses history.
* <p>
* Results with this filter are unpredictable if the path being followed is a
* subdirectory.
*/
public class FollowFilter extends TreeFilter {
/**
* Create a new tree filter for a user supplied path.
* <p>
* Path strings are relative to the root of the repository. If the user's
* input should be assumed relative to a subdirectory of the repository the
* caller must prepend the subdirectory's path prior to creating the filter.
* <p>
* Path strings use '/' to delimit directories on all platforms.
*
* @param path
* the path to filter on. Must not be the empty string. All
* trailing '/' characters will be trimmed before string's length
* is checked or is used as part of the constructed filter.
* @return a new filter for the requested path.
* @throws IllegalArgumentException
* the path supplied was the empty string.
*/
public static FollowFilter create(String path) {
return new FollowFilter(PathFilter.create(path));
}
private final PathFilter path;
FollowFilter(final PathFilter path) {
this.path = path;
}
/** @return the path this filter matches. */
public String getPath() {
return path.getPath();
}
@Override
public boolean include(final TreeWalk walker)
throws MissingObjectException, IncorrectObjectTypeException,
IOException {
return path.include(walker) && ANY_DIFF.include(walker);
}
@Override
public boolean shouldBeRecursive() {
return path.shouldBeRecursive() || ANY_DIFF.shouldBeRecursive();
}
@Override
public TreeFilter clone() {
return new FollowFilter(path.clone());
}
@Override
public String toString() {
return "(FOLLOW(" + path.toString() + ")" //
+ " AND " //
+ ANY_DIFF.toString() + ")";
}
}

View File

@ -44,11 +44,17 @@
package org.eclipse.jgit.revwalk;
import java.io.IOException;
import java.util.List;
import org.eclipse.jgit.diff.DiffEntry;
import org.eclipse.jgit.diff.RenameDetector;
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
import org.eclipse.jgit.errors.CorruptObjectException;
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.errors.StopWalkException;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.revwalk.filter.RevFilter;
import org.eclipse.jgit.treewalk.TreeWalk;
import org.eclipse.jgit.treewalk.filter.TreeFilter;
@ -76,8 +82,11 @@ class RewriteTreeFilter extends RevFilter {
private final TreeWalk pathFilter;
private final Repository repo;
RewriteTreeFilter(final RevWalk walker, final TreeFilter t) {
pathFilter = new TreeWalk(walker.db);
repo = walker.db;
pathFilter = new TreeWalk(repo);
pathFilter.setFilter(t);
pathFilter.setRecursive(t.shouldBeRecursive());
}
@ -128,6 +137,13 @@ public boolean include(final RevWalk walker, final RevCommit c)
// We have interesting items, but neither of the special
// cases denoted above.
//
if (adds > 0 && tw.getFilter() instanceof FollowFilter) {
// One of the paths we care about was added in this
// commit. We need to update our filter to its older
// name, if we can discover it. Find out what that is.
//
updateFollowFilter(trees);
}
return true;
}
} else if (nParents == 0) {
@ -213,4 +229,32 @@ public boolean include(final RevWalk walker, final RevCommit c)
c.flags |= REWRITE;
return false;
}
private void updateFollowFilter(ObjectId[] trees)
throws MissingObjectException, IncorrectObjectTypeException,
CorruptObjectException, IOException {
TreeWalk tw = pathFilter;
FollowFilter oldFilter = (FollowFilter) tw.getFilter();
tw.setFilter(TreeFilter.ANY_DIFF);
tw.reset(trees);
List<DiffEntry> files = DiffEntry.scan(tw);
RenameDetector rd = new RenameDetector(repo);
rd.addAll(files);
files = rd.compute();
TreeFilter newFilter = oldFilter;
for (DiffEntry ent : files) {
if (isRename(ent) && ent.getNewName().equals(oldFilter.getPath())) {
newFilter = FollowFilter.create(ent.getOldName());
break;
}
}
tw.setFilter(newFilter);
}
private static boolean isRename(DiffEntry ent) {
return ent.getChangeType() == ChangeType.RENAME
|| ent.getChangeType() == ChangeType.COPY;
}
}

View File

@ -90,6 +90,11 @@ private PathFilter(final String s) {
pathRaw = Constants.encode(pathStr);
}
/** @return the path this filter matches. */
public String getPath() {
return pathStr;
}
@Override
public boolean include(final TreeWalk walker) {
return walker.isPathPrefix(pathRaw, pathRaw.length) == 0;
@ -104,7 +109,7 @@ public boolean shouldBeRecursive() {
}
@Override
public TreeFilter clone() {
public PathFilter clone() {
return this;
}