From a551b64694c24fff58014ae5ca298b47539cf96d Mon Sep 17 00:00:00 2001 From: Han-Wen Nienhuys Date: Thu, 27 Jul 2017 13:58:49 +0200 Subject: [PATCH] Treat RawText of binary data as file with one single line. This avoids executing mergeAlgorithm.merge on binary data, which is unlikely to be useful. Arguably, binary data should not make it to ResolveMerger#contentMerge, but this approach has the following advantages: * binary detection is exact, since it doesn't only look at the start of the blob. * it is cheap, as we have to iterate over the bytes anyway to find '\n'. Signed-off-by: Han-Wen Nienhuys Change-Id: I424295df1dc60a719859d9d7c599067891b15792 --- .../jgit/util/RawParseUtils_LineMapTest.java | 7 +++++ .../org/eclipse/jgit/util/RawParseUtils.java | 29 +++++++++++++++---- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawParseUtils_LineMapTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawParseUtils_LineMapTest.java index e8e191d78..2e9cbb503 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawParseUtils_LineMapTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawParseUtils_LineMapTest.java @@ -79,6 +79,13 @@ public void testTwoLineNoLF() throws UnsupportedEncodingException { assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 4, buf.length}, asInts(map)); } + @Test + public void testBinary() throws UnsupportedEncodingException { + final byte[] buf = "xxxfoo\nb\0ar".getBytes("ISO-8859-1"); + final IntList map = RawParseUtils.lineMap(buf, 3, buf.length); + assertArrayEquals(new int[]{Integer.MIN_VALUE, 3, buf.length}, asInts(map)); + } + @Test public void testFourLineBlanks() throws UnsupportedEncodingException { final byte[] buf = "foo\n\n\nbar\n".getBytes("ISO-8859-1"); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java index 86777b9cd..ad138bbf1 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java @@ -618,6 +618,10 @@ public static final int prevLF(final byte[] b, int ptr, final char chrA) { *

* The last element (index map.size()-1) always contains * end. + *

+ * If the data contains a '\0' anywhere, the whole region is considered binary + * and a LineMap corresponding to a single line is returned. + *

* * @param buf * buffer to scan. @@ -629,14 +633,29 @@ public static final int prevLF(final byte[] b, int ptr, final char chrA) { * @return a line map indexing the start position of each line. */ public static final IntList lineMap(final byte[] buf, int ptr, int end) { + int start = ptr; + // Experimentally derived from multiple source repositories // the average number of bytes/line is 36. Its a rough guess // to initially size our map close to the target. - // - final IntList map = new IntList((end - ptr) / 36); - map.fillTo(1, Integer.MIN_VALUE); - for (; ptr < end; ptr = nextLF(buf, ptr)) - map.add(ptr); + IntList map = new IntList((end - ptr) / 36); + map.add(Integer.MIN_VALUE); + boolean foundLF = true; + for (; ptr < end; ptr++) { + if (foundLF) { + map.add(ptr); + } + + if (buf[ptr] == '\0') { + // binary data. + map = new IntList(3); + map.add(Integer.MIN_VALUE); + map.add(start); + break; + } + + foundLF = (buf[ptr] == '\n'); + } map.add(end); return map; }