Throw BinaryBlobException from RawParseUtils#lineMap.
This makes detection of binaries exact for ResolveMerger and DiffFormatter: they will classify files as binary regardless of where the '\0' occurs in the text. Signed-off-by: Han-Wen Nienhuys <hanwen@google.com> Change-Id: Id4342a199628d9406bfa04af1b023c27a47d4014
This commit is contained in:
parent
ced658c445
commit
f2e64cd895
|
@ -64,6 +64,16 @@ public void testEmpty() {
|
||||||
assertEquals(0, r.size());
|
assertEquals(0, r.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBinary() {
|
||||||
|
String input = "foo-a\nf\0o-b\n";
|
||||||
|
byte[] data = Constants.encodeASCII(input);
|
||||||
|
final RawText a = new RawText(data);
|
||||||
|
assertEquals(a.content, data);
|
||||||
|
assertEquals(a.size(), 1);
|
||||||
|
assertEquals(a.getString(0, 1, false), input);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testEquals() {
|
public void testEquals() {
|
||||||
final RawText a = new RawText(Constants.encodeASCII("foo-a\nfoo-b\n"));
|
final RawText a = new RawText(Constants.encodeASCII("foo-a\nfoo-b\n"));
|
||||||
|
|
|
@ -48,45 +48,45 @@
|
||||||
|
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
|
|
||||||
|
import org.eclipse.jgit.errors.BinaryBlobException;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
public class RawParseUtils_LineMapTest {
|
public class RawParseUtils_LineMapTest {
|
||||||
@Test
|
@Test
|
||||||
public void testEmpty() {
|
public void testEmpty() throws Exception {
|
||||||
final IntList map = RawParseUtils.lineMap(new byte[] {}, 0, 0);
|
final IntList map = RawParseUtils.lineMap(new byte[] {}, 0, 0);
|
||||||
assertNotNull(map);
|
assertNotNull(map);
|
||||||
assertArrayEquals(new int[]{Integer.MIN_VALUE, 0}, asInts(map));
|
assertArrayEquals(new int[]{Integer.MIN_VALUE, 0}, asInts(map));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testOneBlankLine() {
|
public void testOneBlankLine() throws Exception {
|
||||||
final IntList map = RawParseUtils.lineMap(new byte[] { '\n' }, 0, 1);
|
final IntList map = RawParseUtils.lineMap(new byte[] { '\n' }, 0, 1);
|
||||||
assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 1}, asInts(map));
|
assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 1}, asInts(map));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testTwoLineFooBar() throws UnsupportedEncodingException {
|
public void testTwoLineFooBar() throws Exception {
|
||||||
final byte[] buf = "foo\nbar\n".getBytes("ISO-8859-1");
|
final byte[] buf = "foo\nbar\n".getBytes("ISO-8859-1");
|
||||||
final IntList map = RawParseUtils.lineMap(buf, 0, buf.length);
|
final IntList map = RawParseUtils.lineMap(buf, 0, buf.length);
|
||||||
assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 4, buf.length}, asInts(map));
|
assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 4, buf.length}, asInts(map));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testTwoLineNoLF() throws UnsupportedEncodingException {
|
public void testTwoLineNoLF() throws Exception {
|
||||||
final byte[] buf = "foo\nbar".getBytes("ISO-8859-1");
|
final byte[] buf = "foo\nbar".getBytes("ISO-8859-1");
|
||||||
final IntList map = RawParseUtils.lineMap(buf, 0, buf.length);
|
final IntList map = RawParseUtils.lineMap(buf, 0, buf.length);
|
||||||
assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 4, buf.length}, asInts(map));
|
assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 4, buf.length}, asInts(map));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test(expected = BinaryBlobException.class)
|
||||||
public void testBinary() throws UnsupportedEncodingException {
|
public void testBinary() throws Exception {
|
||||||
final byte[] buf = "xxxfoo\nb\0ar".getBytes("ISO-8859-1");
|
final byte[] buf = "xxxfoo\nb\0ar".getBytes("ISO-8859-1");
|
||||||
final IntList map = RawParseUtils.lineMap(buf, 3, buf.length);
|
RawParseUtils.lineMap(buf, 3, buf.length);
|
||||||
assertArrayEquals(new int[]{Integer.MIN_VALUE, 3, buf.length}, asInts(map));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testFourLineBlanks() throws UnsupportedEncodingException {
|
public void testFourLineBlanks() throws Exception {
|
||||||
final byte[] buf = "foo\n\n\nbar\n".getBytes("ISO-8859-1");
|
final byte[] buf = "foo\n\n\nbar\n".getBytes("ISO-8859-1");
|
||||||
final IntList map = RawParseUtils.lineMap(buf, 0, buf.length);
|
final IntList map = RawParseUtils.lineMap(buf, 0, buf.length);
|
||||||
|
|
||||||
|
|
|
@ -93,7 +93,29 @@ public class RawText extends Sequence {
|
||||||
*/
|
*/
|
||||||
public RawText(final byte[] input) {
|
public RawText(final byte[] input) {
|
||||||
content = input;
|
content = input;
|
||||||
lines = RawParseUtils.lineMap(content, 0, content.length);
|
IntList map;
|
||||||
|
try {
|
||||||
|
map = RawParseUtils.lineMap(content, 0, content.length);
|
||||||
|
} catch (BinaryBlobException e) {
|
||||||
|
map = new IntList(3);
|
||||||
|
map.add(Integer.MIN_VALUE);
|
||||||
|
map.add(0);
|
||||||
|
map.add(content.length);
|
||||||
|
}
|
||||||
|
lines = map;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Construct a new RawText if the line map is already known.
|
||||||
|
*
|
||||||
|
* @param data
|
||||||
|
* the blob data.
|
||||||
|
* @param lineMap
|
||||||
|
* Indices of line starts, with indexed by base-1 linenumber.
|
||||||
|
*/
|
||||||
|
private RawText(final byte[] data, final IntList lineMap) {
|
||||||
|
content = data;
|
||||||
|
lines = lineMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -357,7 +379,8 @@ public static RawText load(ObjectLoader ldr, int threshold) throws IOException,
|
||||||
|
|
||||||
System.arraycopy(head, 0, data, 0, head.length);
|
System.arraycopy(head, 0, data, 0, head.length);
|
||||||
IO.readFully(stream, data, off, (int) (sz-off));
|
IO.readFully(stream, data, off, (int) (sz-off));
|
||||||
return new RawText(data);
|
IntList lineMap = RawParseUtils.lineMap(data, 0, data.length);
|
||||||
|
return new RawText(data, lineMap);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -63,6 +63,7 @@
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.eclipse.jgit.annotations.Nullable;
|
import org.eclipse.jgit.annotations.Nullable;
|
||||||
|
import org.eclipse.jgit.errors.BinaryBlobException;
|
||||||
import org.eclipse.jgit.lib.Constants;
|
import org.eclipse.jgit.lib.Constants;
|
||||||
import org.eclipse.jgit.lib.PersonIdent;
|
import org.eclipse.jgit.lib.PersonIdent;
|
||||||
|
|
||||||
|
@ -618,9 +619,6 @@ public static final int prevLF(final byte[] b, int ptr, final char chrA) {
|
||||||
* <p>
|
* <p>
|
||||||
* The last element (index <code>map.size()-1</code>) always contains
|
* The last element (index <code>map.size()-1</code>) always contains
|
||||||
* <code>end</code>.
|
* <code>end</code>.
|
||||||
* <p>
|
|
||||||
* If the data contains a '\0' anywhere, the whole region is considered binary
|
|
||||||
* and a LineMap corresponding to a single line is returned.
|
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
* @param buf
|
* @param buf
|
||||||
|
@ -631,10 +629,9 @@ public static final int prevLF(final byte[] b, int ptr, final char chrA) {
|
||||||
* @param end
|
* @param end
|
||||||
* 1 past the end of the content within <code>buf</code>.
|
* 1 past the end of the content within <code>buf</code>.
|
||||||
* @return a line map indexing the start position of each line.
|
* @return a line map indexing the start position of each line.
|
||||||
|
* @throws BinaryBlobException if any '\0' is found.
|
||||||
*/
|
*/
|
||||||
public static final IntList lineMap(final byte[] buf, int ptr, int end) {
|
public static final IntList lineMap(final byte[] buf, int ptr, int end) throws BinaryBlobException {
|
||||||
int start = ptr;
|
|
||||||
|
|
||||||
// Experimentally derived from multiple source repositories
|
// Experimentally derived from multiple source repositories
|
||||||
// the average number of bytes/line is 36. Its a rough guess
|
// the average number of bytes/line is 36. Its a rough guess
|
||||||
// to initially size our map close to the target.
|
// to initially size our map close to the target.
|
||||||
|
@ -647,11 +644,7 @@ public static final IntList lineMap(final byte[] buf, int ptr, int end) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (buf[ptr] == '\0') {
|
if (buf[ptr] == '\0') {
|
||||||
// binary data.
|
throw new BinaryBlobException();
|
||||||
map = new IntList(3);
|
|
||||||
map.add(Integer.MIN_VALUE);
|
|
||||||
map.add(start);
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
foundLF = (buf[ptr] == '\n');
|
foundLF = (buf[ptr] == '\n');
|
||||||
|
|
Loading…
Reference in New Issue