Introduce RawText#load.

This method creates a RawText from a blob, but avoids reading the blob
if the start contains null bytes. This should reduce the amount of
garbage that Gerrit produces for changes with binaries.

Signed-off-by: Han-Wen Nienhuys <hanwen@google.com>
Change-Id: Idd202d20251f2d1653e5f1ca374fe644c2cf205f
This commit is contained in:
Han-Wen Nienhuys 2017-10-09 18:11:04 +02:00
parent fbefe1e999
commit ea2a4e3abe
4 changed files with 273 additions and 55 deletions

View File

@ -0,0 +1,110 @@
/*
* Copyright (C) 2017, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.diff;
import org.eclipse.jgit.errors.BinaryBlobException;
import org.eclipse.jgit.internal.storage.file.FileRepository;
import org.eclipse.jgit.junit.RepositoryTestCase;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectInserter;
import org.eclipse.jgit.lib.ObjectLoader;
import org.junit.Assert;
import org.junit.Test;
import java.io.IOException;
public class RawTextLoadTest extends RepositoryTestCase {
private static byte[] generate(int size, int nullAt) {
byte[] data = new byte[size];
for (int i = 0; i < data.length; i++) {
data[i] = (byte) ((i % 72 == 0) ? '\n' : (i%10) + '0');
}
if (nullAt >= 0) {
data[nullAt] = '\0';
}
return data;
}
private RawText textFor(byte[] data, int limit) throws IOException, BinaryBlobException {
FileRepository repo = createBareRepository();
ObjectId id;
try (ObjectInserter ins = repo.getObjectDatabase().newInserter()) {
id = ins.insert(Constants.OBJ_BLOB, data);
}
ObjectLoader ldr = repo.open(id);
return RawText.load(ldr, limit);
}
@Test
public void testSmallOK() throws Exception {
byte[] data = generate(1000, -1);
RawText result = textFor(data, 1 << 20);
Assert.assertArrayEquals(result.content, data);
}
@Test(expected = BinaryBlobException.class)
public void testSmallNull() throws Exception {
byte[] data = generate(1000, 22);
textFor(data, 1 << 20);
}
@Test
public void testBigOK() throws Exception {
byte[] data = generate(10000, -1);
RawText result = textFor(data, 1 << 20);
Assert.assertArrayEquals(result.content, data);
}
@Test(expected = BinaryBlobException.class)
public void testBigWithNullAtStart() throws Exception {
byte[] data = generate(10000, 22);
textFor(data, 1 << 20);
}
@Test(expected = BinaryBlobException.class)
public void testBinaryThreshold() throws Exception {
byte[] data = generate(2 << 20, -1);
textFor(data, 1 << 20);
}
}

View File

@ -66,9 +66,9 @@
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
import org.eclipse.jgit.dircache.DirCacheIterator;
import org.eclipse.jgit.errors.AmbiguousObjectException;
import org.eclipse.jgit.errors.BinaryBlobException;
import org.eclipse.jgit.errors.CorruptObjectException;
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
import org.eclipse.jgit.errors.LargeObjectException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.lib.AbbreviatedObjectId;
@ -113,9 +113,6 @@ public class DiffFormatter implements AutoCloseable {
/** Magic return content indicating it is empty or no content present. */
private static final byte[] EMPTY = new byte[] {};
/** Magic return indicating the content is binary. */
private static final byte[] BINARY = new byte[] {};
private final OutputStream out;
private ObjectReader reader;
@ -954,47 +951,50 @@ private FormatResult createFormatResult(DiffEntry ent) throws IOException,
// Content not changed (e.g. only mode, pure rename)
editList = new EditList();
type = PatchType.UNIFIED;
res.header = new FileHeader(buf.toByteArray(), editList, type);
return res;
}
assertHaveReader();
RawText aRaw = null;
RawText bRaw = null;
if (ent.getOldMode() == GITLINK || ent.getNewMode() == GITLINK) {
aRaw = new RawText(writeGitLinkText(ent.getOldId()));
bRaw = new RawText(writeGitLinkText(ent.getNewId()));
} else {
assertHaveReader();
byte[] aRaw, bRaw;
if (ent.getOldMode() == GITLINK || ent.getNewMode() == GITLINK) {
aRaw = writeGitLinkText(ent.getOldId());
bRaw = writeGitLinkText(ent.getNewId());
} else {
try {
aRaw = open(OLD, ent);
bRaw = open(NEW, ent);
}
if (aRaw == BINARY || bRaw == BINARY //
|| RawText.isBinary(aRaw) || RawText.isBinary(bRaw)) {
} catch (BinaryBlobException e) {
// Do nothing; we check for null below.
formatOldNewPaths(buf, ent);
buf.write(encodeASCII("Binary files differ\n")); //$NON-NLS-1$
editList = new EditList();
type = PatchType.BINARY;
} else {
res.a = new RawText(aRaw);
res.b = new RawText(bRaw);
editList = diff(res.a, res.b);
type = PatchType.UNIFIED;
switch (ent.getChangeType()) {
case RENAME:
case COPY:
if (!editList.isEmpty())
formatOldNewPaths(buf, ent);
break;
default:
formatOldNewPaths(buf, ent);
break;
}
res.header = new FileHeader(buf.toByteArray(), editList, type);
return res;
}
}
res.a = aRaw;
res.b = bRaw;
editList = diff(res.a, res.b);
type = PatchType.UNIFIED;
switch (ent.getChangeType()) {
case RENAME:
case COPY:
if (!editList.isEmpty())
formatOldNewPaths(buf, ent);
break;
default:
formatOldNewPaths(buf, ent);
break;
}
res.header = new FileHeader(buf.toByteArray(), editList, type);
return res;
}
@ -1009,13 +1009,13 @@ private void assertHaveReader() {
}
}
private byte[] open(DiffEntry.Side side, DiffEntry entry)
throws IOException {
private RawText open(DiffEntry.Side side, DiffEntry entry)
throws IOException, BinaryBlobException {
if (entry.getMode(side) == FileMode.MISSING)
return EMPTY;
return RawText.EMPTY_TEXT;
if (entry.getMode(side).getObjectType() != Constants.OBJ_BLOB)
return EMPTY;
return RawText.EMPTY_TEXT;
AbbreviatedObjectId id = entry.getId(side);
if (!id.isComplete()) {
@ -1036,23 +1036,8 @@ private byte[] open(DiffEntry.Side side, DiffEntry entry)
throw new AmbiguousObjectException(id, ids);
}
try {
ObjectLoader ldr = source.open(side, entry);
return ldr.getBytes(binaryFileThreshold);
} catch (LargeObjectException.ExceedsLimit overLimit) {
return BINARY;
} catch (LargeObjectException.ExceedsByteArrayLimit overLimit) {
return BINARY;
} catch (LargeObjectException.OutOfMemory tooBig) {
return BINARY;
} catch (LargeObjectException tooBig) {
tooBig.setObjectId(id.toObjectId());
throw tooBig;
}
ObjectLoader ldr = source.open(side, entry);
return RawText.load(ldr, binaryFileThreshold);
}
/**

View File

@ -44,11 +44,15 @@
package org.eclipse.jgit.diff;
import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import org.eclipse.jgit.errors.BinaryBlobException;
import org.eclipse.jgit.errors.LargeObjectException;
import org.eclipse.jgit.lib.ObjectLoader;
import org.eclipse.jgit.util.IO;
import org.eclipse.jgit.util.IntList;
import org.eclipse.jgit.util.RawParseUtils;
@ -295,4 +299,65 @@ public String getLineDelimiter() {
else
return "\n"; //$NON-NLS-1$
}
/**
* Read a blob object into RawText, or throw BinaryBlobException if
* the blob is binary.
*
* @param ldr
* the ObjectLoader for the blob
* @param threshold
* if the blob is larger than this size, it is always assumed to be binary.
* @since 4.10
* @return the RawText representing the blob.
* @throws BinaryBlobException if the blob contains binary data.
* @throws IOException if the input could not be read.
*/
public static RawText load(ObjectLoader ldr, int threshold) throws IOException, BinaryBlobException {
long sz = ldr.getSize();
if (sz > threshold) {
throw new BinaryBlobException();
}
if (sz <= FIRST_FEW_BYTES) {
byte[] data = ldr.getCachedBytes(FIRST_FEW_BYTES);
if (isBinary(data)) {
throw new BinaryBlobException();
}
return new RawText(data);
}
byte[] head = new byte[FIRST_FEW_BYTES];
try (InputStream stream = ldr.openStream()) {
int off = 0;
int left = head.length;
while (left > 0) {
int n = stream.read(head, off, left);
if (n < 0) {
throw new EOFException();
}
left -= n;
while (n > 0) {
if (head[off] == '\0') {
throw new BinaryBlobException();
}
off++;
n--;
}
}
byte data[];
try {
data = new byte[(int)sz];
} catch (OutOfMemoryError e) {
throw new LargeObjectException.OutOfMemory(e);
}
System.arraycopy(head, 0, data, 0, head.length);
IO.readFully(stream, data, off, (int) (sz-off));
return new RawText(data);
}
}
}

View File

@ -0,0 +1,58 @@
/*
* Copyright (C) 2017 Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.errors;
/**
* BinaryBlobException is used to signal that binary data was found
* in a context that requires text (eg. for generating textual diffs).
*
* @since 4.10
*/
public class BinaryBlobException extends Exception {
private static final long serialVersionUID = 1L;
/**
* Construct a BinaryBlobException.
*/
public BinaryBlobException() {}
}