diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextTest.java index ad14b0125..48a062df8 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextTest.java @@ -18,8 +18,10 @@ import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; +import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.util.Arrays; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.util.RawParseUtils; @@ -244,6 +246,38 @@ public void testLineDelimiter2() throws Exception { assertTrue(rt.isMissingNewlineAtEnd()); } + @Test + public void testCrAtLimit() throws Exception { + int limit = RawText.getBufferSize(); + byte[] data = new byte[RawText.getBufferSize() + 2]; + data[0] = 'A'; + for (int i = 1; i < limit - 1; i++) { + if (i % 7 == 0) { + data[i] = '\n'; + } else { + data[i] = (byte) ('A' + i % 7); + } + } + data[limit - 1] = '\r'; + data[limit] = '\n'; + data[limit + 1] = 'A'; + assertTrue(RawText.isBinary(data, limit, true)); + assertFalse(RawText.isBinary(data, limit, false)); + assertFalse(RawText.isBinary(data, data.length, true)); + byte[] buf = Arrays.copyOf(data, limit); + try (ByteArrayInputStream in = new ByteArrayInputStream(buf)) { + assertTrue(RawText.isBinary(in)); + } + byte[] buf2 = Arrays.copyOf(data, limit + 1); + try (ByteArrayInputStream in = new ByteArrayInputStream(buf2)) { + assertFalse(RawText.isBinary(in)); + } + byte[] buf3 = Arrays.copyOf(data, limit + 2); + try (ByteArrayInputStream in = new ByteArrayInputStream(buf3)) { + assertFalse(RawText.isBinary(in)); + } + } + private static RawText t(String text) { StringBuilder r = new StringBuilder(); for (int i = 0; i < text.length(); i++) { diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java index 19961a13e..b52803513 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java @@ -288,12 +288,13 @@ public static int setBufferSize(int bufferSize) { * if input stream could not be read */ public static boolean isBinary(InputStream raw) throws IOException { - final byte[] buffer = new byte[getBufferSize()]; + final byte[] buffer = new byte[getBufferSize() + 1]; int cnt = 0; while (cnt < buffer.length) { final int n = raw.read(buffer, cnt, buffer.length - cnt); - if (n == -1) + if (n == -1) { break; + } cnt += n; } return isBinary(buffer, cnt, cnt < buffer.length); @@ -347,8 +348,16 @@ public static boolean isBinary(byte[] raw, int length, boolean complete) { // - limited buffer size; may be only the beginning of a large blob // - no counting of printable vs. non-printable bytes < 0x20 and 0x7F int maxLength = getBufferSize(); + boolean isComplete = complete; if (length > maxLength) { + // We restrict the length in all cases to getBufferSize() to get + // predictable behavior. Sometimes we load streams, and sometimes we + // have the full data in memory. With streams, we never look at more + // than the first getBufferSize() bytes. If we looked at more when + // we have the full data, different code paths in JGit might come to + // different conclusions. length = maxLength; + isComplete = false; } byte last = 'x'; // Just something inconspicuous. for (int ptr = 0; ptr < length; ptr++) { @@ -358,7 +367,7 @@ public static boolean isBinary(byte[] raw, int length, boolean complete) { } last = curr; } - if (complete) { + if (isComplete) { // Buffer contains everything... return last == '\r'; // ... so this must be a lone CR }