RawText.isBinary(): handle complete buffer correctly

Make sure we always get consistent results, whether or not we have the
full data in the buffer.

Change-Id: Ieb379a0c375ad3dd352e63ac2f23bda6ef16c215
Signed-off-by: Thomas Wolf <twolf@apache.org>
This commit is contained in:
Thomas Wolf 2022-10-31 22:44:50 +01:00 committed by Matthias Sohn
parent 0fb9d26eff
commit 1c886d92f6
2 changed files with 46 additions and 3 deletions

View File

@ -18,8 +18,10 @@
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Arrays;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.util.RawParseUtils;
@ -244,6 +246,38 @@ public void testLineDelimiter2() throws Exception {
assertTrue(rt.isMissingNewlineAtEnd());
}
@Test
public void testCrAtLimit() throws Exception {
int limit = RawText.getBufferSize();
byte[] data = new byte[RawText.getBufferSize() + 2];
data[0] = 'A';
for (int i = 1; i < limit - 1; i++) {
if (i % 7 == 0) {
data[i] = '\n';
} else {
data[i] = (byte) ('A' + i % 7);
}
}
data[limit - 1] = '\r';
data[limit] = '\n';
data[limit + 1] = 'A';
assertTrue(RawText.isBinary(data, limit, true));
assertFalse(RawText.isBinary(data, limit, false));
assertFalse(RawText.isBinary(data, data.length, true));
byte[] buf = Arrays.copyOf(data, limit);
try (ByteArrayInputStream in = new ByteArrayInputStream(buf)) {
assertTrue(RawText.isBinary(in));
}
byte[] buf2 = Arrays.copyOf(data, limit + 1);
try (ByteArrayInputStream in = new ByteArrayInputStream(buf2)) {
assertFalse(RawText.isBinary(in));
}
byte[] buf3 = Arrays.copyOf(data, limit + 2);
try (ByteArrayInputStream in = new ByteArrayInputStream(buf3)) {
assertFalse(RawText.isBinary(in));
}
}
private static RawText t(String text) {
StringBuilder r = new StringBuilder();
for (int i = 0; i < text.length(); i++) {

View File

@ -288,12 +288,13 @@ public static int setBufferSize(int bufferSize) {
* if input stream could not be read
*/
public static boolean isBinary(InputStream raw) throws IOException {
final byte[] buffer = new byte[getBufferSize()];
final byte[] buffer = new byte[getBufferSize() + 1];
int cnt = 0;
while (cnt < buffer.length) {
final int n = raw.read(buffer, cnt, buffer.length - cnt);
if (n == -1)
if (n == -1) {
break;
}
cnt += n;
}
return isBinary(buffer, cnt, cnt < buffer.length);
@ -347,8 +348,16 @@ public static boolean isBinary(byte[] raw, int length, boolean complete) {
// - limited buffer size; may be only the beginning of a large blob
// - no counting of printable vs. non-printable bytes < 0x20 and 0x7F
int maxLength = getBufferSize();
boolean isComplete = complete;
if (length > maxLength) {
// We restrict the length in all cases to getBufferSize() to get
// predictable behavior. Sometimes we load streams, and sometimes we
// have the full data in memory. With streams, we never look at more
// than the first getBufferSize() bytes. If we looked at more when
// we have the full data, different code paths in JGit might come to
// different conclusions.
length = maxLength;
isComplete = false;
}
byte last = 'x'; // Just something inconspicuous.
for (int ptr = 0; ptr < length; ptr++) {
@ -358,7 +367,7 @@ public static boolean isBinary(byte[] raw, int length, boolean complete) {
}
last = curr;
}
if (complete) {
if (isComplete) {
// Buffer contains everything...
return last == '\r'; // ... so this must be a lone CR
}