Merge "Skip detecting content renames for binary files"
This commit is contained in:
commit
6dc3506b52
|
@ -579,6 +579,57 @@ public void testInexactRename_LargeFile() throws Exception {
|
||||||
assertDelete(PATH_Q, bId, FileMode.REGULAR_FILE, entries.get(1));
|
assertDelete(PATH_Q, bId, FileMode.REGULAR_FILE, entries.get(1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testExactRenameForBinaryFile_isIdentified() throws Exception {
|
||||||
|
ObjectId aId = blob("a\nb\nc\n\0\0\0\0d\n");
|
||||||
|
|
||||||
|
DiffEntry a = DiffEntry.add(PATH_A, aId);
|
||||||
|
DiffEntry b = DiffEntry.delete(PATH_Q, aId);
|
||||||
|
|
||||||
|
rd.add(a);
|
||||||
|
rd.add(b);
|
||||||
|
|
||||||
|
List<DiffEntry> entries = rd.compute();
|
||||||
|
assertEquals(1, entries.size());
|
||||||
|
assertRename(b, a, 100, entries.get(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testInexactRenameForBinaryFile_identifiedByDefault() throws Exception {
|
||||||
|
ObjectId aId = blob("a\nb\nc\n\0\0\0\0d\n");
|
||||||
|
ObjectId bId = blob("a\nb\nc\n\0\0\0d\n");
|
||||||
|
|
||||||
|
DiffEntry a = DiffEntry.add(PATH_A, aId);
|
||||||
|
DiffEntry b = DiffEntry.delete(PATH_Q, bId);
|
||||||
|
|
||||||
|
rd.add(a);
|
||||||
|
rd.add(b);
|
||||||
|
rd.setRenameScore(40);
|
||||||
|
|
||||||
|
List<DiffEntry> entries = rd.compute();
|
||||||
|
assertEquals(1, entries.size());
|
||||||
|
assertRename(b, a, 50, entries.get(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testInexactRenameForBinaryFile_notIdentifiedIfSkipParameterSet() throws Exception {
|
||||||
|
ObjectId aId = blob("a\nb\nc\n\0\0\0\0d\n");
|
||||||
|
ObjectId bId = blob("a\nb\nc\n\0\0\0d\n");
|
||||||
|
|
||||||
|
DiffEntry a = DiffEntry.add(PATH_A, aId);
|
||||||
|
DiffEntry b = DiffEntry.delete(PATH_Q, bId);
|
||||||
|
|
||||||
|
rd.add(a);
|
||||||
|
rd.add(b);
|
||||||
|
rd.setRenameScore(40);
|
||||||
|
rd.setSkipContentRenamesForBinaryFiles(true);
|
||||||
|
|
||||||
|
List<DiffEntry> entries = rd.compute();
|
||||||
|
assertEquals(2, entries.size());
|
||||||
|
assertAdd(PATH_A, aId, FileMode.REGULAR_FILE, entries.get(0));
|
||||||
|
assertDelete(PATH_Q, bId, FileMode.REGULAR_FILE, entries.get(1));
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSetRenameScore_IllegalArgs() throws Exception {
|
public void testSetRenameScore_IllegalArgs() throws Exception {
|
||||||
try {
|
try {
|
||||||
|
|
|
@ -104,6 +104,13 @@ private int sortOf(ChangeType changeType) {
|
||||||
*/
|
*/
|
||||||
private int bigFileThreshold = DEFAULT_BIG_FILE_THRESHOLD;
|
private int bigFileThreshold = DEFAULT_BIG_FILE_THRESHOLD;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Skip detecting content renames for binary files. Content renames are
|
||||||
|
* those that are not exact, that is with a slight content modification
|
||||||
|
* between the two files.
|
||||||
|
*/
|
||||||
|
private boolean skipContentRenamesForBinaryFiles = false;
|
||||||
|
|
||||||
/** Set if the number of adds or deletes was over the limit. */
|
/** Set if the number of adds or deletes was over the limit. */
|
||||||
private boolean overRenameLimit;
|
private boolean overRenameLimit;
|
||||||
|
|
||||||
|
@ -235,6 +242,26 @@ public void setBigFileThreshold(int threshold) {
|
||||||
this.bigFileThreshold = threshold;
|
this.bigFileThreshold = threshold;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get skipping detecting content renames for binary files.
|
||||||
|
*
|
||||||
|
* @return true if content renames should be skipped for binary files, false otherwise.
|
||||||
|
* @since 5.12
|
||||||
|
*/
|
||||||
|
public boolean getSkipContentRenamesForBinaryFiles() {
|
||||||
|
return skipContentRenamesForBinaryFiles;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets skipping detecting content renames for binary files.
|
||||||
|
*
|
||||||
|
* @param value true if content renames should be skipped for binary files, false otherwise.
|
||||||
|
* @since 5.12
|
||||||
|
*/
|
||||||
|
public void setSkipContentRenamesForBinaryFiles(boolean value) {
|
||||||
|
this.skipContentRenamesForBinaryFiles = value;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if the detector is over the rename limit.
|
* Check if the detector is over the rename limit.
|
||||||
* <p>
|
* <p>
|
||||||
|
@ -521,6 +548,7 @@ private void findContentRenames(ContentSource.Pair reader,
|
||||||
d = new SimilarityRenameDetector(reader, deleted, added);
|
d = new SimilarityRenameDetector(reader, deleted, added);
|
||||||
d.setRenameScore(getRenameScore());
|
d.setRenameScore(getRenameScore());
|
||||||
d.setBigFileThreshold(getBigFileThreshold());
|
d.setBigFileThreshold(getBigFileThreshold());
|
||||||
|
d.setSkipBinaryFiles(getSkipContentRenamesForBinaryFiles());
|
||||||
d.compute(pm);
|
d.compute(pm);
|
||||||
overRenameLimit |= d.isTableOverflow();
|
overRenameLimit |= d.isTableOverflow();
|
||||||
deleted = d.getLeftOverSources();
|
deleted = d.getLeftOverSources();
|
||||||
|
|
|
@ -102,6 +102,15 @@ public static SimilarityIndex create(ObjectLoader obj) throws IOException,
|
||||||
idGrowAt = growAt(idHashBits);
|
idGrowAt = growAt(idHashBits);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static boolean isBinary(ObjectLoader obj) throws IOException {
|
||||||
|
if (obj.isLarge()) {
|
||||||
|
try (ObjectStream in1 = obj.openStream()) {
|
||||||
|
return RawText.isBinary(in1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return RawText.isBinary(obj.getCachedBytes());
|
||||||
|
}
|
||||||
|
|
||||||
void hash(ObjectLoader obj) throws MissingObjectException, IOException,
|
void hash(ObjectLoader obj) throws MissingObjectException, IOException,
|
||||||
TableFullException {
|
TableFullException {
|
||||||
if (obj.isLarge()) {
|
if (obj.isLarge()) {
|
||||||
|
@ -115,9 +124,7 @@ void hash(ObjectLoader obj) throws MissingObjectException, IOException,
|
||||||
private void hashLargeObject(ObjectLoader obj) throws IOException,
|
private void hashLargeObject(ObjectLoader obj) throws IOException,
|
||||||
TableFullException {
|
TableFullException {
|
||||||
boolean text;
|
boolean text;
|
||||||
try (ObjectStream in1 = obj.openStream()) {
|
text = !isBinary(obj);
|
||||||
text = !RawText.isBinary(in1);
|
|
||||||
}
|
|
||||||
|
|
||||||
try (ObjectStream in2 = obj.openStream()) {
|
try (ObjectStream in2 = obj.openStream()) {
|
||||||
hash(in2, in2.getSize(), text);
|
hash(in2, in2.getSize(), text);
|
||||||
|
|
|
@ -26,6 +26,7 @@
|
||||||
import org.eclipse.jgit.internal.JGitText;
|
import org.eclipse.jgit.internal.JGitText;
|
||||||
import org.eclipse.jgit.lib.FileMode;
|
import org.eclipse.jgit.lib.FileMode;
|
||||||
import org.eclipse.jgit.lib.NullProgressMonitor;
|
import org.eclipse.jgit.lib.NullProgressMonitor;
|
||||||
|
import org.eclipse.jgit.lib.ObjectLoader;
|
||||||
import org.eclipse.jgit.lib.ProgressMonitor;
|
import org.eclipse.jgit.lib.ProgressMonitor;
|
||||||
|
|
||||||
class SimilarityRenameDetector {
|
class SimilarityRenameDetector {
|
||||||
|
@ -87,6 +88,9 @@ class SimilarityRenameDetector {
|
||||||
*/
|
*/
|
||||||
private int bigFileThreshold = DEFAULT_BIG_FILE_THRESHOLD;
|
private int bigFileThreshold = DEFAULT_BIG_FILE_THRESHOLD;
|
||||||
|
|
||||||
|
/** Skip content renames for binary files. */
|
||||||
|
private boolean skipBinaryFiles = false;
|
||||||
|
|
||||||
/** Set if any {@link SimilarityIndex.TableFullException} occurs. */
|
/** Set if any {@link SimilarityIndex.TableFullException} occurs. */
|
||||||
private boolean tableOverflow;
|
private boolean tableOverflow;
|
||||||
|
|
||||||
|
@ -107,6 +111,10 @@ void setBigFileThreshold(int threshold) {
|
||||||
bigFileThreshold = threshold;
|
bigFileThreshold = threshold;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void setSkipBinaryFiles(boolean value) {
|
||||||
|
skipBinaryFiles = value;
|
||||||
|
}
|
||||||
|
|
||||||
void compute(ProgressMonitor pm) throws IOException, CancelledException {
|
void compute(ProgressMonitor pm) throws IOException, CancelledException {
|
||||||
if (pm == null)
|
if (pm == null)
|
||||||
pm = NullProgressMonitor.INSTANCE;
|
pm = NullProgressMonitor.INSTANCE;
|
||||||
|
@ -271,7 +279,12 @@ private int buildMatrix(ProgressMonitor pm)
|
||||||
|
|
||||||
if (s == null) {
|
if (s == null) {
|
||||||
try {
|
try {
|
||||||
s = hash(OLD, srcEnt);
|
ObjectLoader loader = reader.open(OLD, srcEnt);
|
||||||
|
if (skipBinaryFiles && SimilarityIndex.isBinary(loader)) {
|
||||||
|
pm.update(1);
|
||||||
|
continue SRC;
|
||||||
|
}
|
||||||
|
s = hash(loader);
|
||||||
} catch (TableFullException tableFull) {
|
} catch (TableFullException tableFull) {
|
||||||
tableOverflow = true;
|
tableOverflow = true;
|
||||||
continue SRC;
|
continue SRC;
|
||||||
|
@ -280,7 +293,12 @@ private int buildMatrix(ProgressMonitor pm)
|
||||||
|
|
||||||
SimilarityIndex d;
|
SimilarityIndex d;
|
||||||
try {
|
try {
|
||||||
d = hash(NEW, dstEnt);
|
ObjectLoader loader = reader.open(NEW, dstEnt);
|
||||||
|
if (skipBinaryFiles && SimilarityIndex.isBinary(loader)) {
|
||||||
|
pm.update(1);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
d = hash(loader);
|
||||||
} catch (TableFullException tableFull) {
|
} catch (TableFullException tableFull) {
|
||||||
if (dstTooLarge == null)
|
if (dstTooLarge == null)
|
||||||
dstTooLarge = new BitSet(dsts.size());
|
dstTooLarge = new BitSet(dsts.size());
|
||||||
|
@ -364,10 +382,10 @@ static int nameScore(String a, String b) {
|
||||||
return (((dirScoreLtr + dirScoreRtl) * 25) + (fileScore * 50)) / 100;
|
return (((dirScoreLtr + dirScoreRtl) * 25) + (fileScore * 50)) / 100;
|
||||||
}
|
}
|
||||||
|
|
||||||
private SimilarityIndex hash(DiffEntry.Side side, DiffEntry ent)
|
private SimilarityIndex hash(ObjectLoader objectLoader)
|
||||||
throws IOException, TableFullException {
|
throws IOException, TableFullException {
|
||||||
SimilarityIndex r = new SimilarityIndex();
|
SimilarityIndex r = new SimilarityIndex();
|
||||||
r.hash(reader.open(side, ent));
|
r.hash(objectLoader);
|
||||||
r.sort();
|
r.sort();
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue