Introduce RawText#load.
This method creates a RawText from a blob, but avoids reading the blob if the start contains null bytes. This should reduce the amount of garbage that Gerrit produces for changes with binaries. Signed-off-by: Han-Wen Nienhuys <hanwen@google.com> Change-Id: Idd202d20251f2d1653e5f1ca374fe644c2cf205f
This commit is contained in:
parent
fbefe1e999
commit
ea2a4e3abe
|
@ -0,0 +1,110 @@
|
|||
/*
|
||||
* Copyright (C) 2017, Google Inc.
|
||||
* and other copyright owners as documented in the project's IP log.
|
||||
*
|
||||
* This program and the accompanying materials are made available
|
||||
* under the terms of the Eclipse Distribution License v1.0 which
|
||||
* accompanies this distribution, is reproduced below, and is
|
||||
* available at http://www.eclipse.org/org/documents/edl-v10.php
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or
|
||||
* without modification, are permitted provided that the following
|
||||
* conditions are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* - Redistributions in binary form must reproduce the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer in the documentation and/or other materials provided
|
||||
* with the distribution.
|
||||
*
|
||||
* - Neither the name of the Eclipse Foundation, Inc. nor the
|
||||
* names of its contributors may be used to endorse or promote
|
||||
* products derived from this software without specific prior
|
||||
* written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
|
||||
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
package org.eclipse.jgit.diff;
|
||||
|
||||
import org.eclipse.jgit.errors.BinaryBlobException;
|
||||
import org.eclipse.jgit.internal.storage.file.FileRepository;
|
||||
import org.eclipse.jgit.junit.RepositoryTestCase;
|
||||
import org.eclipse.jgit.lib.Constants;
|
||||
import org.eclipse.jgit.lib.ObjectId;
|
||||
import org.eclipse.jgit.lib.ObjectInserter;
|
||||
import org.eclipse.jgit.lib.ObjectLoader;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class RawTextLoadTest extends RepositoryTestCase {
|
||||
private static byte[] generate(int size, int nullAt) {
|
||||
byte[] data = new byte[size];
|
||||
for (int i = 0; i < data.length; i++) {
|
||||
data[i] = (byte) ((i % 72 == 0) ? '\n' : (i%10) + '0');
|
||||
}
|
||||
if (nullAt >= 0) {
|
||||
data[nullAt] = '\0';
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
private RawText textFor(byte[] data, int limit) throws IOException, BinaryBlobException {
|
||||
FileRepository repo = createBareRepository();
|
||||
ObjectId id;
|
||||
try (ObjectInserter ins = repo.getObjectDatabase().newInserter()) {
|
||||
id = ins.insert(Constants.OBJ_BLOB, data);
|
||||
}
|
||||
ObjectLoader ldr = repo.open(id);
|
||||
return RawText.load(ldr, limit);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSmallOK() throws Exception {
|
||||
byte[] data = generate(1000, -1);
|
||||
RawText result = textFor(data, 1 << 20);
|
||||
Assert.assertArrayEquals(result.content, data);
|
||||
}
|
||||
|
||||
@Test(expected = BinaryBlobException.class)
|
||||
public void testSmallNull() throws Exception {
|
||||
byte[] data = generate(1000, 22);
|
||||
textFor(data, 1 << 20);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBigOK() throws Exception {
|
||||
byte[] data = generate(10000, -1);
|
||||
RawText result = textFor(data, 1 << 20);
|
||||
Assert.assertArrayEquals(result.content, data);
|
||||
}
|
||||
|
||||
@Test(expected = BinaryBlobException.class)
|
||||
public void testBigWithNullAtStart() throws Exception {
|
||||
byte[] data = generate(10000, 22);
|
||||
textFor(data, 1 << 20);
|
||||
}
|
||||
|
||||
@Test(expected = BinaryBlobException.class)
|
||||
public void testBinaryThreshold() throws Exception {
|
||||
byte[] data = generate(2 << 20, -1);
|
||||
textFor(data, 1 << 20);
|
||||
}
|
||||
}
|
|
@ -66,9 +66,9 @@
|
|||
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
|
||||
import org.eclipse.jgit.dircache.DirCacheIterator;
|
||||
import org.eclipse.jgit.errors.AmbiguousObjectException;
|
||||
import org.eclipse.jgit.errors.BinaryBlobException;
|
||||
import org.eclipse.jgit.errors.CorruptObjectException;
|
||||
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
|
||||
import org.eclipse.jgit.errors.LargeObjectException;
|
||||
import org.eclipse.jgit.errors.MissingObjectException;
|
||||
import org.eclipse.jgit.internal.JGitText;
|
||||
import org.eclipse.jgit.lib.AbbreviatedObjectId;
|
||||
|
@ -113,9 +113,6 @@ public class DiffFormatter implements AutoCloseable {
|
|||
/** Magic return content indicating it is empty or no content present. */
|
||||
private static final byte[] EMPTY = new byte[] {};
|
||||
|
||||
/** Magic return indicating the content is binary. */
|
||||
private static final byte[] BINARY = new byte[] {};
|
||||
|
||||
private final OutputStream out;
|
||||
|
||||
private ObjectReader reader;
|
||||
|
@ -954,47 +951,50 @@ private FormatResult createFormatResult(DiffEntry ent) throws IOException,
|
|||
// Content not changed (e.g. only mode, pure rename)
|
||||
editList = new EditList();
|
||||
type = PatchType.UNIFIED;
|
||||
res.header = new FileHeader(buf.toByteArray(), editList, type);
|
||||
return res;
|
||||
}
|
||||
|
||||
assertHaveReader();
|
||||
|
||||
RawText aRaw = null;
|
||||
RawText bRaw = null;
|
||||
if (ent.getOldMode() == GITLINK || ent.getNewMode() == GITLINK) {
|
||||
aRaw = new RawText(writeGitLinkText(ent.getOldId()));
|
||||
bRaw = new RawText(writeGitLinkText(ent.getNewId()));
|
||||
} else {
|
||||
assertHaveReader();
|
||||
|
||||
byte[] aRaw, bRaw;
|
||||
|
||||
if (ent.getOldMode() == GITLINK || ent.getNewMode() == GITLINK) {
|
||||
aRaw = writeGitLinkText(ent.getOldId());
|
||||
bRaw = writeGitLinkText(ent.getNewId());
|
||||
} else {
|
||||
try {
|
||||
aRaw = open(OLD, ent);
|
||||
bRaw = open(NEW, ent);
|
||||
}
|
||||
|
||||
if (aRaw == BINARY || bRaw == BINARY //
|
||||
|| RawText.isBinary(aRaw) || RawText.isBinary(bRaw)) {
|
||||
} catch (BinaryBlobException e) {
|
||||
// Do nothing; we check for null below.
|
||||
formatOldNewPaths(buf, ent);
|
||||
buf.write(encodeASCII("Binary files differ\n")); //$NON-NLS-1$
|
||||
editList = new EditList();
|
||||
type = PatchType.BINARY;
|
||||
|
||||
} else {
|
||||
res.a = new RawText(aRaw);
|
||||
res.b = new RawText(bRaw);
|
||||
editList = diff(res.a, res.b);
|
||||
type = PatchType.UNIFIED;
|
||||
|
||||
switch (ent.getChangeType()) {
|
||||
case RENAME:
|
||||
case COPY:
|
||||
if (!editList.isEmpty())
|
||||
formatOldNewPaths(buf, ent);
|
||||
break;
|
||||
|
||||
default:
|
||||
formatOldNewPaths(buf, ent);
|
||||
break;
|
||||
}
|
||||
res.header = new FileHeader(buf.toByteArray(), editList, type);
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
res.a = aRaw;
|
||||
res.b = bRaw;
|
||||
editList = diff(res.a, res.b);
|
||||
type = PatchType.UNIFIED;
|
||||
|
||||
switch (ent.getChangeType()) {
|
||||
case RENAME:
|
||||
case COPY:
|
||||
if (!editList.isEmpty())
|
||||
formatOldNewPaths(buf, ent);
|
||||
break;
|
||||
|
||||
default:
|
||||
formatOldNewPaths(buf, ent);
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
res.header = new FileHeader(buf.toByteArray(), editList, type);
|
||||
return res;
|
||||
}
|
||||
|
@ -1009,13 +1009,13 @@ private void assertHaveReader() {
|
|||
}
|
||||
}
|
||||
|
||||
private byte[] open(DiffEntry.Side side, DiffEntry entry)
|
||||
throws IOException {
|
||||
private RawText open(DiffEntry.Side side, DiffEntry entry)
|
||||
throws IOException, BinaryBlobException {
|
||||
if (entry.getMode(side) == FileMode.MISSING)
|
||||
return EMPTY;
|
||||
return RawText.EMPTY_TEXT;
|
||||
|
||||
if (entry.getMode(side).getObjectType() != Constants.OBJ_BLOB)
|
||||
return EMPTY;
|
||||
return RawText.EMPTY_TEXT;
|
||||
|
||||
AbbreviatedObjectId id = entry.getId(side);
|
||||
if (!id.isComplete()) {
|
||||
|
@ -1036,23 +1036,8 @@ private byte[] open(DiffEntry.Side side, DiffEntry entry)
|
|||
throw new AmbiguousObjectException(id, ids);
|
||||
}
|
||||
|
||||
try {
|
||||
ObjectLoader ldr = source.open(side, entry);
|
||||
return ldr.getBytes(binaryFileThreshold);
|
||||
|
||||
} catch (LargeObjectException.ExceedsLimit overLimit) {
|
||||
return BINARY;
|
||||
|
||||
} catch (LargeObjectException.ExceedsByteArrayLimit overLimit) {
|
||||
return BINARY;
|
||||
|
||||
} catch (LargeObjectException.OutOfMemory tooBig) {
|
||||
return BINARY;
|
||||
|
||||
} catch (LargeObjectException tooBig) {
|
||||
tooBig.setObjectId(id.toObjectId());
|
||||
throw tooBig;
|
||||
}
|
||||
ObjectLoader ldr = source.open(side, entry);
|
||||
return RawText.load(ldr, binaryFileThreshold);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -44,11 +44,15 @@
|
|||
|
||||
package org.eclipse.jgit.diff;
|
||||
|
||||
import java.io.EOFException;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
|
||||
import org.eclipse.jgit.errors.BinaryBlobException;
|
||||
import org.eclipse.jgit.errors.LargeObjectException;
|
||||
import org.eclipse.jgit.lib.ObjectLoader;
|
||||
import org.eclipse.jgit.util.IO;
|
||||
import org.eclipse.jgit.util.IntList;
|
||||
import org.eclipse.jgit.util.RawParseUtils;
|
||||
|
@ -295,4 +299,65 @@ public String getLineDelimiter() {
|
|||
else
|
||||
return "\n"; //$NON-NLS-1$
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a blob object into RawText, or throw BinaryBlobException if
|
||||
* the blob is binary.
|
||||
*
|
||||
* @param ldr
|
||||
* the ObjectLoader for the blob
|
||||
* @param threshold
|
||||
* if the blob is larger than this size, it is always assumed to be binary.
|
||||
* @since 4.10
|
||||
* @return the RawText representing the blob.
|
||||
* @throws BinaryBlobException if the blob contains binary data.
|
||||
* @throws IOException if the input could not be read.
|
||||
*/
|
||||
public static RawText load(ObjectLoader ldr, int threshold) throws IOException, BinaryBlobException {
|
||||
long sz = ldr.getSize();
|
||||
|
||||
if (sz > threshold) {
|
||||
throw new BinaryBlobException();
|
||||
}
|
||||
|
||||
if (sz <= FIRST_FEW_BYTES) {
|
||||
byte[] data = ldr.getCachedBytes(FIRST_FEW_BYTES);
|
||||
if (isBinary(data)) {
|
||||
throw new BinaryBlobException();
|
||||
}
|
||||
return new RawText(data);
|
||||
}
|
||||
|
||||
byte[] head = new byte[FIRST_FEW_BYTES];
|
||||
try (InputStream stream = ldr.openStream()) {
|
||||
int off = 0;
|
||||
int left = head.length;
|
||||
while (left > 0) {
|
||||
int n = stream.read(head, off, left);
|
||||
if (n < 0) {
|
||||
throw new EOFException();
|
||||
}
|
||||
left -= n;
|
||||
|
||||
while (n > 0) {
|
||||
if (head[off] == '\0') {
|
||||
throw new BinaryBlobException();
|
||||
}
|
||||
off++;
|
||||
n--;
|
||||
}
|
||||
}
|
||||
|
||||
byte data[];
|
||||
try {
|
||||
data = new byte[(int)sz];
|
||||
} catch (OutOfMemoryError e) {
|
||||
throw new LargeObjectException.OutOfMemory(e);
|
||||
}
|
||||
|
||||
System.arraycopy(head, 0, data, 0, head.length);
|
||||
IO.readFully(stream, data, off, (int) (sz-off));
|
||||
return new RawText(data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,58 @@
|
|||
/*
|
||||
* Copyright (C) 2017 Google Inc.
|
||||
* and other copyright owners as documented in the project's IP log.
|
||||
*
|
||||
* This program and the accompanying materials are made available
|
||||
* under the terms of the Eclipse Distribution License v1.0 which
|
||||
* accompanies this distribution, is reproduced below, and is
|
||||
* available at http://www.eclipse.org/org/documents/edl-v10.php
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or
|
||||
* without modification, are permitted provided that the following
|
||||
* conditions are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* - Redistributions in binary form must reproduce the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer in the documentation and/or other materials provided
|
||||
* with the distribution.
|
||||
*
|
||||
* - Neither the name of the Eclipse Foundation, Inc. nor the
|
||||
* names of its contributors may be used to endorse or promote
|
||||
* products derived from this software without specific prior
|
||||
* written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
|
||||
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
package org.eclipse.jgit.errors;
|
||||
|
||||
/**
|
||||
* BinaryBlobException is used to signal that binary data was found
|
||||
* in a context that requires text (eg. for generating textual diffs).
|
||||
*
|
||||
* @since 4.10
|
||||
*/
|
||||
public class BinaryBlobException extends Exception {
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
/**
|
||||
* Construct a BinaryBlobException.
|
||||
*/
|
||||
public BinaryBlobException() {}
|
||||
}
|
Loading…
Reference in New Issue