Treat RawText of binary data as file with one single line.
This avoids executing mergeAlgorithm.merge on binary data, which is unlikely to be useful. Arguably, binary data should not make it to ResolveMerger#contentMerge, but this approach has the following advantages: * binary detection is exact, since it doesn't only look at the start of the blob. * it is cheap, as we have to iterate over the bytes anyway to find '\n'. Signed-off-by: Han-Wen Nienhuys <hanwen@google.com> Change-Id: I424295df1dc60a719859d9d7c599067891b15792
This commit is contained in:
parent
ab0eedcead
commit
a551b64694
|
@ -79,6 +79,13 @@ public void testTwoLineNoLF() throws UnsupportedEncodingException {
|
|||
assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 4, buf.length}, asInts(map));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBinary() throws UnsupportedEncodingException {
|
||||
final byte[] buf = "xxxfoo\nb\0ar".getBytes("ISO-8859-1");
|
||||
final IntList map = RawParseUtils.lineMap(buf, 3, buf.length);
|
||||
assertArrayEquals(new int[]{Integer.MIN_VALUE, 3, buf.length}, asInts(map));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFourLineBlanks() throws UnsupportedEncodingException {
|
||||
final byte[] buf = "foo\n\n\nbar\n".getBytes("ISO-8859-1");
|
||||
|
|
|
@ -618,6 +618,10 @@ public static final int prevLF(final byte[] b, int ptr, final char chrA) {
|
|||
* <p>
|
||||
* The last element (index <code>map.size()-1</code>) always contains
|
||||
* <code>end</code>.
|
||||
* <p>
|
||||
* If the data contains a '\0' anywhere, the whole region is considered binary
|
||||
* and a LineMap corresponding to a single line is returned.
|
||||
* </p>
|
||||
*
|
||||
* @param buf
|
||||
* buffer to scan.
|
||||
|
@ -629,14 +633,29 @@ public static final int prevLF(final byte[] b, int ptr, final char chrA) {
|
|||
* @return a line map indexing the start position of each line.
|
||||
*/
|
||||
public static final IntList lineMap(final byte[] buf, int ptr, int end) {
|
||||
int start = ptr;
|
||||
|
||||
// Experimentally derived from multiple source repositories
|
||||
// the average number of bytes/line is 36. Its a rough guess
|
||||
// to initially size our map close to the target.
|
||||
//
|
||||
final IntList map = new IntList((end - ptr) / 36);
|
||||
map.fillTo(1, Integer.MIN_VALUE);
|
||||
for (; ptr < end; ptr = nextLF(buf, ptr))
|
||||
IntList map = new IntList((end - ptr) / 36);
|
||||
map.add(Integer.MIN_VALUE);
|
||||
boolean foundLF = true;
|
||||
for (; ptr < end; ptr++) {
|
||||
if (foundLF) {
|
||||
map.add(ptr);
|
||||
}
|
||||
|
||||
if (buf[ptr] == '\0') {
|
||||
// binary data.
|
||||
map = new IntList(3);
|
||||
map.add(Integer.MIN_VALUE);
|
||||
map.add(start);
|
||||
break;
|
||||
}
|
||||
|
||||
foundLF = (buf[ptr] == '\n');
|
||||
}
|
||||
map.add(end);
|
||||
return map;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue