Treat RawText of binary data as file with one single line.

This avoids executing mergeAlgorithm.merge on binary data, which is
unlikely to be useful.

Arguably, binary data should not make it to
ResolveMerger#contentMerge, but this approach has the following
advantages:

* binary detection is exact, since it doesn't only look at the start
  of the blob.

* it is cheap, as we have to iterate over the bytes anyway to find
  '\n'.

Signed-off-by: Han-Wen Nienhuys <hanwen@google.com>
Change-Id: I424295df1dc60a719859d9d7c599067891b15792
This commit is contained in:
Han-Wen Nienhuys 2017-07-27 13:58:49 +02:00
parent ab0eedcead
commit a551b64694
2 changed files with 31 additions and 5 deletions

View File

@ -79,6 +79,13 @@ public void testTwoLineNoLF() throws UnsupportedEncodingException {
assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 4, buf.length}, asInts(map));
}
@Test
public void testBinary() throws UnsupportedEncodingException {
final byte[] buf = "xxxfoo\nb\0ar".getBytes("ISO-8859-1");
final IntList map = RawParseUtils.lineMap(buf, 3, buf.length);
assertArrayEquals(new int[]{Integer.MIN_VALUE, 3, buf.length}, asInts(map));
}
@Test
public void testFourLineBlanks() throws UnsupportedEncodingException {
final byte[] buf = "foo\n\n\nbar\n".getBytes("ISO-8859-1");

View File

@ -618,6 +618,10 @@ public static final int prevLF(final byte[] b, int ptr, final char chrA) {
* <p>
* The last element (index <code>map.size()-1</code>) always contains
* <code>end</code>.
* <p>
* If the data contains a '\0' anywhere, the whole region is considered binary
* and a LineMap corresponding to a single line is returned.
* </p>
*
* @param buf
* buffer to scan.
@ -629,14 +633,29 @@ public static final int prevLF(final byte[] b, int ptr, final char chrA) {
* @return a line map indexing the start position of each line.
*/
public static final IntList lineMap(final byte[] buf, int ptr, int end) {
int start = ptr;
// Experimentally derived from multiple source repositories
// the average number of bytes/line is 36. Its a rough guess
// to initially size our map close to the target.
//
final IntList map = new IntList((end - ptr) / 36);
map.fillTo(1, Integer.MIN_VALUE);
for (; ptr < end; ptr = nextLF(buf, ptr))
map.add(ptr);
IntList map = new IntList((end - ptr) / 36);
map.add(Integer.MIN_VALUE);
boolean foundLF = true;
for (; ptr < end; ptr++) {
if (foundLF) {
map.add(ptr);
}
if (buf[ptr] == '\0') {
// binary data.
map = new IntList(3);
map.add(Integer.MIN_VALUE);
map.add(start);
break;
}
foundLF = (buf[ptr] == '\n');
}
map.add(end);
return map;
}