RawText.isBinary(): handle complete buffer correctly

Make sure we always get consistent results, whether or not we have the
full data in the buffer.

Change-Id: Ieb379a0c375ad3dd352e63ac2f23bda6ef16c215
Signed-off-by: Thomas Wolf <twolf@apache.org>
This commit is contained in:
Thomas Wolf 2022-10-31 22:44:50 +01:00 committed by Matthias Sohn
parent 0fb9d26eff
commit 1c886d92f6
2 changed files with 46 additions and 3 deletions

View File

@ -18,8 +18,10 @@
import static org.junit.Assert.assertNull; import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays;
import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.util.RawParseUtils; import org.eclipse.jgit.util.RawParseUtils;
@ -244,6 +246,38 @@ public void testLineDelimiter2() throws Exception {
assertTrue(rt.isMissingNewlineAtEnd()); assertTrue(rt.isMissingNewlineAtEnd());
} }
@Test
public void testCrAtLimit() throws Exception {
int limit = RawText.getBufferSize();
byte[] data = new byte[RawText.getBufferSize() + 2];
data[0] = 'A';
for (int i = 1; i < limit - 1; i++) {
if (i % 7 == 0) {
data[i] = '\n';
} else {
data[i] = (byte) ('A' + i % 7);
}
}
data[limit - 1] = '\r';
data[limit] = '\n';
data[limit + 1] = 'A';
assertTrue(RawText.isBinary(data, limit, true));
assertFalse(RawText.isBinary(data, limit, false));
assertFalse(RawText.isBinary(data, data.length, true));
byte[] buf = Arrays.copyOf(data, limit);
try (ByteArrayInputStream in = new ByteArrayInputStream(buf)) {
assertTrue(RawText.isBinary(in));
}
byte[] buf2 = Arrays.copyOf(data, limit + 1);
try (ByteArrayInputStream in = new ByteArrayInputStream(buf2)) {
assertFalse(RawText.isBinary(in));
}
byte[] buf3 = Arrays.copyOf(data, limit + 2);
try (ByteArrayInputStream in = new ByteArrayInputStream(buf3)) {
assertFalse(RawText.isBinary(in));
}
}
private static RawText t(String text) { private static RawText t(String text) {
StringBuilder r = new StringBuilder(); StringBuilder r = new StringBuilder();
for (int i = 0; i < text.length(); i++) { for (int i = 0; i < text.length(); i++) {

View File

@ -288,12 +288,13 @@ public static int setBufferSize(int bufferSize) {
* if input stream could not be read * if input stream could not be read
*/ */
public static boolean isBinary(InputStream raw) throws IOException { public static boolean isBinary(InputStream raw) throws IOException {
final byte[] buffer = new byte[getBufferSize()]; final byte[] buffer = new byte[getBufferSize() + 1];
int cnt = 0; int cnt = 0;
while (cnt < buffer.length) { while (cnt < buffer.length) {
final int n = raw.read(buffer, cnt, buffer.length - cnt); final int n = raw.read(buffer, cnt, buffer.length - cnt);
if (n == -1) if (n == -1) {
break; break;
}
cnt += n; cnt += n;
} }
return isBinary(buffer, cnt, cnt < buffer.length); return isBinary(buffer, cnt, cnt < buffer.length);
@ -347,8 +348,16 @@ public static boolean isBinary(byte[] raw, int length, boolean complete) {
// - limited buffer size; may be only the beginning of a large blob // - limited buffer size; may be only the beginning of a large blob
// - no counting of printable vs. non-printable bytes < 0x20 and 0x7F // - no counting of printable vs. non-printable bytes < 0x20 and 0x7F
int maxLength = getBufferSize(); int maxLength = getBufferSize();
boolean isComplete = complete;
if (length > maxLength) { if (length > maxLength) {
// We restrict the length in all cases to getBufferSize() to get
// predictable behavior. Sometimes we load streams, and sometimes we
// have the full data in memory. With streams, we never look at more
// than the first getBufferSize() bytes. If we looked at more when
// we have the full data, different code paths in JGit might come to
// different conclusions.
length = maxLength; length = maxLength;
isComplete = false;
} }
byte last = 'x'; // Just something inconspicuous. byte last = 'x'; // Just something inconspicuous.
for (int ptr = 0; ptr < length; ptr++) { for (int ptr = 0; ptr < length; ptr++) {
@ -358,7 +367,7 @@ public static boolean isBinary(byte[] raw, int length, boolean complete) {
} }
last = curr; last = curr;
} }
if (complete) { if (isComplete) {
// Buffer contains everything... // Buffer contains everything...
return last == '\r'; // ... so this must be a lone CR return last == '\r'; // ... so this must be a lone CR
} }