Merge changes I424295df,Ib003f7c8

* changes:
  Treat RawText of binary data as file with one single line.
  Trim boilerplate in RawParseUtils_LineMapTest.
This commit is contained in:
David Pursehouse 2017-08-01 10:18:48 -04:00 committed by Gerrit Code Review @ Eclipse.org
commit 4085646f6d
2 changed files with 48 additions and 29 deletions

View File

@ -43,6 +43,7 @@
package org.eclipse.jgit.util; package org.eclipse.jgit.util;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNotNull;
@ -55,52 +56,51 @@ public class RawParseUtils_LineMapTest {
public void testEmpty() { public void testEmpty() {
final IntList map = RawParseUtils.lineMap(new byte[] {}, 0, 0); final IntList map = RawParseUtils.lineMap(new byte[] {}, 0, 0);
assertNotNull(map); assertNotNull(map);
assertEquals(2, map.size()); assertArrayEquals(new int[]{Integer.MIN_VALUE, 0}, asInts(map));
assertEquals(Integer.MIN_VALUE, map.get(0));
assertEquals(0, map.get(1));
} }
@Test @Test
public void testOneBlankLine() { public void testOneBlankLine() {
final IntList map = RawParseUtils.lineMap(new byte[] { '\n' }, 0, 1); final IntList map = RawParseUtils.lineMap(new byte[] { '\n' }, 0, 1);
assertEquals(3, map.size()); assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 1}, asInts(map));
assertEquals(Integer.MIN_VALUE, map.get(0));
assertEquals(0, map.get(1));
assertEquals(1, map.get(2));
} }
@Test @Test
public void testTwoLineFooBar() throws UnsupportedEncodingException { public void testTwoLineFooBar() throws UnsupportedEncodingException {
final byte[] buf = "foo\nbar\n".getBytes("ISO-8859-1"); final byte[] buf = "foo\nbar\n".getBytes("ISO-8859-1");
final IntList map = RawParseUtils.lineMap(buf, 0, buf.length); final IntList map = RawParseUtils.lineMap(buf, 0, buf.length);
assertEquals(4, map.size()); assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 4, buf.length}, asInts(map));
assertEquals(Integer.MIN_VALUE, map.get(0));
assertEquals(0, map.get(1));
assertEquals(4, map.get(2));
assertEquals(buf.length, map.get(3));
} }
@Test @Test
public void testTwoLineNoLF() throws UnsupportedEncodingException { public void testTwoLineNoLF() throws UnsupportedEncodingException {
final byte[] buf = "foo\nbar".getBytes("ISO-8859-1"); final byte[] buf = "foo\nbar".getBytes("ISO-8859-1");
final IntList map = RawParseUtils.lineMap(buf, 0, buf.length); final IntList map = RawParseUtils.lineMap(buf, 0, buf.length);
assertEquals(4, map.size()); assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 4, buf.length}, asInts(map));
assertEquals(Integer.MIN_VALUE, map.get(0)); }
assertEquals(0, map.get(1));
assertEquals(4, map.get(2)); @Test
assertEquals(buf.length, map.get(3)); public void testBinary() throws UnsupportedEncodingException {
final byte[] buf = "xxxfoo\nb\0ar".getBytes("ISO-8859-1");
final IntList map = RawParseUtils.lineMap(buf, 3, buf.length);
assertArrayEquals(new int[]{Integer.MIN_VALUE, 3, buf.length}, asInts(map));
} }
@Test @Test
public void testFourLineBlanks() throws UnsupportedEncodingException { public void testFourLineBlanks() throws UnsupportedEncodingException {
final byte[] buf = "foo\n\n\nbar\n".getBytes("ISO-8859-1"); final byte[] buf = "foo\n\n\nbar\n".getBytes("ISO-8859-1");
final IntList map = RawParseUtils.lineMap(buf, 0, buf.length); final IntList map = RawParseUtils.lineMap(buf, 0, buf.length);
assertEquals(6, map.size());
assertEquals(Integer.MIN_VALUE, map.get(0)); assertArrayEquals(new int[]{
assertEquals(0, map.get(1)); Integer.MIN_VALUE, 0, 4, 5, 6, buf.length
assertEquals(4, map.get(2)); }, asInts(map));
assertEquals(5, map.get(3)); }
assertEquals(6, map.get(4));
assertEquals(buf.length, map.get(5)); private int[] asInts(IntList l) {
int[] result = new int[l.size()];
for (int i = 0; i < l.size(); i++) {
result[i] = l.get(i);
}
return result;
} }
} }

View File

@ -618,6 +618,10 @@ public static final int prevLF(final byte[] b, int ptr, final char chrA) {
* <p> * <p>
* The last element (index <code>map.size()-1</code>) always contains * The last element (index <code>map.size()-1</code>) always contains
* <code>end</code>. * <code>end</code>.
* <p>
* If the data contains a '\0' anywhere, the whole region is considered binary
* and a LineMap corresponding to a single line is returned.
* </p>
* *
* @param buf * @param buf
* buffer to scan. * buffer to scan.
@ -629,14 +633,29 @@ public static final int prevLF(final byte[] b, int ptr, final char chrA) {
* @return a line map indexing the start position of each line. * @return a line map indexing the start position of each line.
*/ */
public static final IntList lineMap(final byte[] buf, int ptr, int end) { public static final IntList lineMap(final byte[] buf, int ptr, int end) {
int start = ptr;
// Experimentally derived from multiple source repositories // Experimentally derived from multiple source repositories
// the average number of bytes/line is 36. Its a rough guess // the average number of bytes/line is 36. Its a rough guess
// to initially size our map close to the target. // to initially size our map close to the target.
// IntList map = new IntList((end - ptr) / 36);
final IntList map = new IntList((end - ptr) / 36); map.add(Integer.MIN_VALUE);
map.fillTo(1, Integer.MIN_VALUE); boolean foundLF = true;
for (; ptr < end; ptr = nextLF(buf, ptr)) for (; ptr < end; ptr++) {
map.add(ptr); if (foundLF) {
map.add(ptr);
}
if (buf[ptr] == '\0') {
// binary data.
map = new IntList(3);
map.add(Integer.MIN_VALUE);
map.add(start);
break;
}
foundLF = (buf[ptr] == '\n');
}
map.add(end); map.add(end);
return map; return map;
} }