From 1788b72d1af819dee6f371af9e1b0667f0ed8a64 Mon Sep 17 00:00:00 2001 From: Youssef Elghareeb Date: Wed, 12 May 2021 15:35:45 +0200 Subject: [PATCH] Skip detecting content renames for binary files This is similar to change Idbc2c29bd that skipped detecting content renames for large files. With this change, we added a new option in RenameDetector called "skipContentRenamesForBinaryFiles", that when set, causes binary files with any slight modification to be identified as added/deleted. The default for this boolean is false, so preserving current behaviour. Change-Id: I4770b1f69c60b1037025ddd0940ba86df6047299 --- .../eclipse/jgit/diff/RenameDetectorTest.java | 51 +++++++++++++++++++ .../org/eclipse/jgit/diff/RenameDetector.java | 28 ++++++++++ .../eclipse/jgit/diff/SimilarityIndex.java | 13 +++-- .../jgit/diff/SimilarityRenameDetector.java | 26 ++++++++-- 4 files changed, 111 insertions(+), 7 deletions(-) diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java index 2ea3cd7eb..5edb60ce3 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java @@ -579,6 +579,57 @@ public void testInexactRename_LargeFile() throws Exception { assertDelete(PATH_Q, bId, FileMode.REGULAR_FILE, entries.get(1)); } + @Test + public void testExactRenameForBinaryFile_isIdentified() throws Exception { + ObjectId aId = blob("a\nb\nc\n\0\0\0\0d\n"); + + DiffEntry a = DiffEntry.add(PATH_A, aId); + DiffEntry b = DiffEntry.delete(PATH_Q, aId); + + rd.add(a); + rd.add(b); + + List entries = rd.compute(); + assertEquals(1, entries.size()); + assertRename(b, a, 100, entries.get(0)); + } + + @Test + public void testInexactRenameForBinaryFile_identifiedByDefault() throws Exception { + ObjectId aId = blob("a\nb\nc\n\0\0\0\0d\n"); + ObjectId bId = blob("a\nb\nc\n\0\0\0d\n"); + + DiffEntry a = DiffEntry.add(PATH_A, aId); + DiffEntry b = DiffEntry.delete(PATH_Q, bId); + + rd.add(a); + rd.add(b); + rd.setRenameScore(40); + + List entries = rd.compute(); + assertEquals(1, entries.size()); + assertRename(b, a, 50, entries.get(0)); + } + + @Test + public void testInexactRenameForBinaryFile_notIdentifiedIfSkipParameterSet() throws Exception { + ObjectId aId = blob("a\nb\nc\n\0\0\0\0d\n"); + ObjectId bId = blob("a\nb\nc\n\0\0\0d\n"); + + DiffEntry a = DiffEntry.add(PATH_A, aId); + DiffEntry b = DiffEntry.delete(PATH_Q, bId); + + rd.add(a); + rd.add(b); + rd.setRenameScore(40); + rd.setSkipContentRenamesForBinaryFiles(true); + + List entries = rd.compute(); + assertEquals(2, entries.size()); + assertAdd(PATH_A, aId, FileMode.REGULAR_FILE, entries.get(0)); + assertDelete(PATH_Q, bId, FileMode.REGULAR_FILE, entries.get(1)); + } + @Test public void testSetRenameScore_IllegalArgs() throws Exception { try { diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java index 75784c255..ba1f63b68 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java @@ -104,6 +104,13 @@ private int sortOf(ChangeType changeType) { */ private int bigFileThreshold = DEFAULT_BIG_FILE_THRESHOLD; + /** + * Skip detecting content renames for binary files. Content renames are + * those that are not exact, that is with a slight content modification + * between the two files. + */ + private boolean skipContentRenamesForBinaryFiles = false; + /** Set if the number of adds or deletes was over the limit. */ private boolean overRenameLimit; @@ -235,6 +242,26 @@ public void setBigFileThreshold(int threshold) { this.bigFileThreshold = threshold; } + /** + * Get skipping detecting content renames for binary files. + * + * @return true if content renames should be skipped for binary files, false otherwise. + * @since 5.12 + */ + public boolean getSkipContentRenamesForBinaryFiles() { + return skipContentRenamesForBinaryFiles; + } + + /** + * Sets skipping detecting content renames for binary files. + * + * @param value true if content renames should be skipped for binary files, false otherwise. + * @since 5.12 + */ + public void setSkipContentRenamesForBinaryFiles(boolean value) { + this.skipContentRenamesForBinaryFiles = value; + } + /** * Check if the detector is over the rename limit. *

@@ -521,6 +548,7 @@ private void findContentRenames(ContentSource.Pair reader, d = new SimilarityRenameDetector(reader, deleted, added); d.setRenameScore(getRenameScore()); d.setBigFileThreshold(getBigFileThreshold()); + d.setSkipBinaryFiles(getSkipContentRenamesForBinaryFiles()); d.compute(pm); overRenameLimit |= d.isTableOverflow(); deleted = d.getLeftOverSources(); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java index fb6e5df58..661369b86 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java @@ -102,6 +102,15 @@ public static SimilarityIndex create(ObjectLoader obj) throws IOException, idGrowAt = growAt(idHashBits); } + static boolean isBinary(ObjectLoader obj) throws IOException { + if (obj.isLarge()) { + try (ObjectStream in1 = obj.openStream()) { + return RawText.isBinary(in1); + } + } + return RawText.isBinary(obj.getCachedBytes()); + } + void hash(ObjectLoader obj) throws MissingObjectException, IOException, TableFullException { if (obj.isLarge()) { @@ -115,9 +124,7 @@ void hash(ObjectLoader obj) throws MissingObjectException, IOException, private void hashLargeObject(ObjectLoader obj) throws IOException, TableFullException { boolean text; - try (ObjectStream in1 = obj.openStream()) { - text = !RawText.isBinary(in1); - } + text = !isBinary(obj); try (ObjectStream in2 = obj.openStream()) { hash(in2, in2.getSize(), text); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java index 082f31d17..5871b4aee 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java @@ -26,6 +26,7 @@ import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.lib.FileMode; import org.eclipse.jgit.lib.NullProgressMonitor; +import org.eclipse.jgit.lib.ObjectLoader; import org.eclipse.jgit.lib.ProgressMonitor; class SimilarityRenameDetector { @@ -87,6 +88,9 @@ class SimilarityRenameDetector { */ private int bigFileThreshold = DEFAULT_BIG_FILE_THRESHOLD; + /** Skip content renames for binary files. */ + private boolean skipBinaryFiles = false; + /** Set if any {@link SimilarityIndex.TableFullException} occurs. */ private boolean tableOverflow; @@ -107,6 +111,10 @@ void setBigFileThreshold(int threshold) { bigFileThreshold = threshold; } + void setSkipBinaryFiles(boolean value) { + skipBinaryFiles = value; + } + void compute(ProgressMonitor pm) throws IOException, CancelledException { if (pm == null) pm = NullProgressMonitor.INSTANCE; @@ -271,7 +279,12 @@ private int buildMatrix(ProgressMonitor pm) if (s == null) { try { - s = hash(OLD, srcEnt); + ObjectLoader loader = reader.open(OLD, srcEnt); + if (skipBinaryFiles && SimilarityIndex.isBinary(loader)) { + pm.update(1); + continue SRC; + } + s = hash(loader); } catch (TableFullException tableFull) { tableOverflow = true; continue SRC; @@ -280,7 +293,12 @@ private int buildMatrix(ProgressMonitor pm) SimilarityIndex d; try { - d = hash(NEW, dstEnt); + ObjectLoader loader = reader.open(NEW, dstEnt); + if (skipBinaryFiles && SimilarityIndex.isBinary(loader)) { + pm.update(1); + continue; + } + d = hash(loader); } catch (TableFullException tableFull) { if (dstTooLarge == null) dstTooLarge = new BitSet(dsts.size()); @@ -364,10 +382,10 @@ static int nameScore(String a, String b) { return (((dirScoreLtr + dirScoreRtl) * 25) + (fileScore * 50)) / 100; } - private SimilarityIndex hash(DiffEntry.Side side, DiffEntry ent) + private SimilarityIndex hash(ObjectLoader objectLoader) throws IOException, TableFullException { SimilarityIndex r = new SimilarityIndex(); - r.hash(reader.open(side, ent)); + r.hash(objectLoader); r.sort(); return r; }