Introduce RawText#load.
This method creates a RawText from a blob, but avoids reading the blob if the start contains null bytes. This should reduce the amount of garbage that Gerrit produces for changes with binaries. Signed-off-by: Han-Wen Nienhuys <hanwen@google.com> Change-Id: Idd202d20251f2d1653e5f1ca374fe644c2cf205f
This commit is contained in:
parent
fbefe1e999
commit
ea2a4e3abe
|
@ -0,0 +1,110 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2017, Google Inc.
|
||||||
|
* and other copyright owners as documented in the project's IP log.
|
||||||
|
*
|
||||||
|
* This program and the accompanying materials are made available
|
||||||
|
* under the terms of the Eclipse Distribution License v1.0 which
|
||||||
|
* accompanies this distribution, is reproduced below, and is
|
||||||
|
* available at http://www.eclipse.org/org/documents/edl-v10.php
|
||||||
|
*
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or
|
||||||
|
* without modification, are permitted provided that the following
|
||||||
|
* conditions are met:
|
||||||
|
*
|
||||||
|
* - Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* - Redistributions in binary form must reproduce the above
|
||||||
|
* copyright notice, this list of conditions and the following
|
||||||
|
* disclaimer in the documentation and/or other materials provided
|
||||||
|
* with the distribution.
|
||||||
|
*
|
||||||
|
* - Neither the name of the Eclipse Foundation, Inc. nor the
|
||||||
|
* names of its contributors may be used to endorse or promote
|
||||||
|
* products derived from this software without specific prior
|
||||||
|
* written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
|
||||||
|
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
||||||
|
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||||
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||||
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||||
|
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||||
|
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
package org.eclipse.jgit.diff;
|
||||||
|
|
||||||
|
import org.eclipse.jgit.errors.BinaryBlobException;
|
||||||
|
import org.eclipse.jgit.internal.storage.file.FileRepository;
|
||||||
|
import org.eclipse.jgit.junit.RepositoryTestCase;
|
||||||
|
import org.eclipse.jgit.lib.Constants;
|
||||||
|
import org.eclipse.jgit.lib.ObjectId;
|
||||||
|
import org.eclipse.jgit.lib.ObjectInserter;
|
||||||
|
import org.eclipse.jgit.lib.ObjectLoader;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
public class RawTextLoadTest extends RepositoryTestCase {
|
||||||
|
private static byte[] generate(int size, int nullAt) {
|
||||||
|
byte[] data = new byte[size];
|
||||||
|
for (int i = 0; i < data.length; i++) {
|
||||||
|
data[i] = (byte) ((i % 72 == 0) ? '\n' : (i%10) + '0');
|
||||||
|
}
|
||||||
|
if (nullAt >= 0) {
|
||||||
|
data[nullAt] = '\0';
|
||||||
|
}
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
private RawText textFor(byte[] data, int limit) throws IOException, BinaryBlobException {
|
||||||
|
FileRepository repo = createBareRepository();
|
||||||
|
ObjectId id;
|
||||||
|
try (ObjectInserter ins = repo.getObjectDatabase().newInserter()) {
|
||||||
|
id = ins.insert(Constants.OBJ_BLOB, data);
|
||||||
|
}
|
||||||
|
ObjectLoader ldr = repo.open(id);
|
||||||
|
return RawText.load(ldr, limit);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSmallOK() throws Exception {
|
||||||
|
byte[] data = generate(1000, -1);
|
||||||
|
RawText result = textFor(data, 1 << 20);
|
||||||
|
Assert.assertArrayEquals(result.content, data);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expected = BinaryBlobException.class)
|
||||||
|
public void testSmallNull() throws Exception {
|
||||||
|
byte[] data = generate(1000, 22);
|
||||||
|
textFor(data, 1 << 20);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBigOK() throws Exception {
|
||||||
|
byte[] data = generate(10000, -1);
|
||||||
|
RawText result = textFor(data, 1 << 20);
|
||||||
|
Assert.assertArrayEquals(result.content, data);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expected = BinaryBlobException.class)
|
||||||
|
public void testBigWithNullAtStart() throws Exception {
|
||||||
|
byte[] data = generate(10000, 22);
|
||||||
|
textFor(data, 1 << 20);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expected = BinaryBlobException.class)
|
||||||
|
public void testBinaryThreshold() throws Exception {
|
||||||
|
byte[] data = generate(2 << 20, -1);
|
||||||
|
textFor(data, 1 << 20);
|
||||||
|
}
|
||||||
|
}
|
|
@ -66,9 +66,9 @@
|
||||||
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
|
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
|
||||||
import org.eclipse.jgit.dircache.DirCacheIterator;
|
import org.eclipse.jgit.dircache.DirCacheIterator;
|
||||||
import org.eclipse.jgit.errors.AmbiguousObjectException;
|
import org.eclipse.jgit.errors.AmbiguousObjectException;
|
||||||
|
import org.eclipse.jgit.errors.BinaryBlobException;
|
||||||
import org.eclipse.jgit.errors.CorruptObjectException;
|
import org.eclipse.jgit.errors.CorruptObjectException;
|
||||||
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
|
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
|
||||||
import org.eclipse.jgit.errors.LargeObjectException;
|
|
||||||
import org.eclipse.jgit.errors.MissingObjectException;
|
import org.eclipse.jgit.errors.MissingObjectException;
|
||||||
import org.eclipse.jgit.internal.JGitText;
|
import org.eclipse.jgit.internal.JGitText;
|
||||||
import org.eclipse.jgit.lib.AbbreviatedObjectId;
|
import org.eclipse.jgit.lib.AbbreviatedObjectId;
|
||||||
|
@ -113,9 +113,6 @@ public class DiffFormatter implements AutoCloseable {
|
||||||
/** Magic return content indicating it is empty or no content present. */
|
/** Magic return content indicating it is empty or no content present. */
|
||||||
private static final byte[] EMPTY = new byte[] {};
|
private static final byte[] EMPTY = new byte[] {};
|
||||||
|
|
||||||
/** Magic return indicating the content is binary. */
|
|
||||||
private static final byte[] BINARY = new byte[] {};
|
|
||||||
|
|
||||||
private final OutputStream out;
|
private final OutputStream out;
|
||||||
|
|
||||||
private ObjectReader reader;
|
private ObjectReader reader;
|
||||||
|
@ -954,47 +951,50 @@ private FormatResult createFormatResult(DiffEntry ent) throws IOException,
|
||||||
// Content not changed (e.g. only mode, pure rename)
|
// Content not changed (e.g. only mode, pure rename)
|
||||||
editList = new EditList();
|
editList = new EditList();
|
||||||
type = PatchType.UNIFIED;
|
type = PatchType.UNIFIED;
|
||||||
|
res.header = new FileHeader(buf.toByteArray(), editList, type);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
assertHaveReader();
|
||||||
|
|
||||||
|
RawText aRaw = null;
|
||||||
|
RawText bRaw = null;
|
||||||
|
if (ent.getOldMode() == GITLINK || ent.getNewMode() == GITLINK) {
|
||||||
|
aRaw = new RawText(writeGitLinkText(ent.getOldId()));
|
||||||
|
bRaw = new RawText(writeGitLinkText(ent.getNewId()));
|
||||||
} else {
|
} else {
|
||||||
assertHaveReader();
|
try {
|
||||||
|
|
||||||
byte[] aRaw, bRaw;
|
|
||||||
|
|
||||||
if (ent.getOldMode() == GITLINK || ent.getNewMode() == GITLINK) {
|
|
||||||
aRaw = writeGitLinkText(ent.getOldId());
|
|
||||||
bRaw = writeGitLinkText(ent.getNewId());
|
|
||||||
} else {
|
|
||||||
aRaw = open(OLD, ent);
|
aRaw = open(OLD, ent);
|
||||||
bRaw = open(NEW, ent);
|
bRaw = open(NEW, ent);
|
||||||
}
|
} catch (BinaryBlobException e) {
|
||||||
|
// Do nothing; we check for null below.
|
||||||
if (aRaw == BINARY || bRaw == BINARY //
|
|
||||||
|| RawText.isBinary(aRaw) || RawText.isBinary(bRaw)) {
|
|
||||||
formatOldNewPaths(buf, ent);
|
formatOldNewPaths(buf, ent);
|
||||||
buf.write(encodeASCII("Binary files differ\n")); //$NON-NLS-1$
|
buf.write(encodeASCII("Binary files differ\n")); //$NON-NLS-1$
|
||||||
editList = new EditList();
|
editList = new EditList();
|
||||||
type = PatchType.BINARY;
|
type = PatchType.BINARY;
|
||||||
|
res.header = new FileHeader(buf.toByteArray(), editList, type);
|
||||||
} else {
|
return res;
|
||||||
res.a = new RawText(aRaw);
|
|
||||||
res.b = new RawText(bRaw);
|
|
||||||
editList = diff(res.a, res.b);
|
|
||||||
type = PatchType.UNIFIED;
|
|
||||||
|
|
||||||
switch (ent.getChangeType()) {
|
|
||||||
case RENAME:
|
|
||||||
case COPY:
|
|
||||||
if (!editList.isEmpty())
|
|
||||||
formatOldNewPaths(buf, ent);
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
formatOldNewPaths(buf, ent);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
res.a = aRaw;
|
||||||
|
res.b = bRaw;
|
||||||
|
editList = diff(res.a, res.b);
|
||||||
|
type = PatchType.UNIFIED;
|
||||||
|
|
||||||
|
switch (ent.getChangeType()) {
|
||||||
|
case RENAME:
|
||||||
|
case COPY:
|
||||||
|
if (!editList.isEmpty())
|
||||||
|
formatOldNewPaths(buf, ent);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
formatOldNewPaths(buf, ent);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
res.header = new FileHeader(buf.toByteArray(), editList, type);
|
res.header = new FileHeader(buf.toByteArray(), editList, type);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
@ -1009,13 +1009,13 @@ private void assertHaveReader() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private byte[] open(DiffEntry.Side side, DiffEntry entry)
|
private RawText open(DiffEntry.Side side, DiffEntry entry)
|
||||||
throws IOException {
|
throws IOException, BinaryBlobException {
|
||||||
if (entry.getMode(side) == FileMode.MISSING)
|
if (entry.getMode(side) == FileMode.MISSING)
|
||||||
return EMPTY;
|
return RawText.EMPTY_TEXT;
|
||||||
|
|
||||||
if (entry.getMode(side).getObjectType() != Constants.OBJ_BLOB)
|
if (entry.getMode(side).getObjectType() != Constants.OBJ_BLOB)
|
||||||
return EMPTY;
|
return RawText.EMPTY_TEXT;
|
||||||
|
|
||||||
AbbreviatedObjectId id = entry.getId(side);
|
AbbreviatedObjectId id = entry.getId(side);
|
||||||
if (!id.isComplete()) {
|
if (!id.isComplete()) {
|
||||||
|
@ -1036,23 +1036,8 @@ private byte[] open(DiffEntry.Side side, DiffEntry entry)
|
||||||
throw new AmbiguousObjectException(id, ids);
|
throw new AmbiguousObjectException(id, ids);
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
ObjectLoader ldr = source.open(side, entry);
|
||||||
ObjectLoader ldr = source.open(side, entry);
|
return RawText.load(ldr, binaryFileThreshold);
|
||||||
return ldr.getBytes(binaryFileThreshold);
|
|
||||||
|
|
||||||
} catch (LargeObjectException.ExceedsLimit overLimit) {
|
|
||||||
return BINARY;
|
|
||||||
|
|
||||||
} catch (LargeObjectException.ExceedsByteArrayLimit overLimit) {
|
|
||||||
return BINARY;
|
|
||||||
|
|
||||||
} catch (LargeObjectException.OutOfMemory tooBig) {
|
|
||||||
return BINARY;
|
|
||||||
|
|
||||||
} catch (LargeObjectException tooBig) {
|
|
||||||
tooBig.setObjectId(id.toObjectId());
|
|
||||||
throw tooBig;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -44,11 +44,15 @@
|
||||||
|
|
||||||
package org.eclipse.jgit.diff;
|
package org.eclipse.jgit.diff;
|
||||||
|
|
||||||
|
import java.io.EOFException;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
|
|
||||||
|
import org.eclipse.jgit.errors.BinaryBlobException;
|
||||||
|
import org.eclipse.jgit.errors.LargeObjectException;
|
||||||
|
import org.eclipse.jgit.lib.ObjectLoader;
|
||||||
import org.eclipse.jgit.util.IO;
|
import org.eclipse.jgit.util.IO;
|
||||||
import org.eclipse.jgit.util.IntList;
|
import org.eclipse.jgit.util.IntList;
|
||||||
import org.eclipse.jgit.util.RawParseUtils;
|
import org.eclipse.jgit.util.RawParseUtils;
|
||||||
|
@ -295,4 +299,65 @@ public String getLineDelimiter() {
|
||||||
else
|
else
|
||||||
return "\n"; //$NON-NLS-1$
|
return "\n"; //$NON-NLS-1$
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read a blob object into RawText, or throw BinaryBlobException if
|
||||||
|
* the blob is binary.
|
||||||
|
*
|
||||||
|
* @param ldr
|
||||||
|
* the ObjectLoader for the blob
|
||||||
|
* @param threshold
|
||||||
|
* if the blob is larger than this size, it is always assumed to be binary.
|
||||||
|
* @since 4.10
|
||||||
|
* @return the RawText representing the blob.
|
||||||
|
* @throws BinaryBlobException if the blob contains binary data.
|
||||||
|
* @throws IOException if the input could not be read.
|
||||||
|
*/
|
||||||
|
public static RawText load(ObjectLoader ldr, int threshold) throws IOException, BinaryBlobException {
|
||||||
|
long sz = ldr.getSize();
|
||||||
|
|
||||||
|
if (sz > threshold) {
|
||||||
|
throw new BinaryBlobException();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sz <= FIRST_FEW_BYTES) {
|
||||||
|
byte[] data = ldr.getCachedBytes(FIRST_FEW_BYTES);
|
||||||
|
if (isBinary(data)) {
|
||||||
|
throw new BinaryBlobException();
|
||||||
|
}
|
||||||
|
return new RawText(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
byte[] head = new byte[FIRST_FEW_BYTES];
|
||||||
|
try (InputStream stream = ldr.openStream()) {
|
||||||
|
int off = 0;
|
||||||
|
int left = head.length;
|
||||||
|
while (left > 0) {
|
||||||
|
int n = stream.read(head, off, left);
|
||||||
|
if (n < 0) {
|
||||||
|
throw new EOFException();
|
||||||
|
}
|
||||||
|
left -= n;
|
||||||
|
|
||||||
|
while (n > 0) {
|
||||||
|
if (head[off] == '\0') {
|
||||||
|
throw new BinaryBlobException();
|
||||||
|
}
|
||||||
|
off++;
|
||||||
|
n--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
byte data[];
|
||||||
|
try {
|
||||||
|
data = new byte[(int)sz];
|
||||||
|
} catch (OutOfMemoryError e) {
|
||||||
|
throw new LargeObjectException.OutOfMemory(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
System.arraycopy(head, 0, data, 0, head.length);
|
||||||
|
IO.readFully(stream, data, off, (int) (sz-off));
|
||||||
|
return new RawText(data);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,58 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2017 Google Inc.
|
||||||
|
* and other copyright owners as documented in the project's IP log.
|
||||||
|
*
|
||||||
|
* This program and the accompanying materials are made available
|
||||||
|
* under the terms of the Eclipse Distribution License v1.0 which
|
||||||
|
* accompanies this distribution, is reproduced below, and is
|
||||||
|
* available at http://www.eclipse.org/org/documents/edl-v10.php
|
||||||
|
*
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or
|
||||||
|
* without modification, are permitted provided that the following
|
||||||
|
* conditions are met:
|
||||||
|
*
|
||||||
|
* - Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* - Redistributions in binary form must reproduce the above
|
||||||
|
* copyright notice, this list of conditions and the following
|
||||||
|
* disclaimer in the documentation and/or other materials provided
|
||||||
|
* with the distribution.
|
||||||
|
*
|
||||||
|
* - Neither the name of the Eclipse Foundation, Inc. nor the
|
||||||
|
* names of its contributors may be used to endorse or promote
|
||||||
|
* products derived from this software without specific prior
|
||||||
|
* written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
|
||||||
|
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
||||||
|
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||||
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||||
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||||
|
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||||
|
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
package org.eclipse.jgit.errors;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* BinaryBlobException is used to signal that binary data was found
|
||||||
|
* in a context that requires text (eg. for generating textual diffs).
|
||||||
|
*
|
||||||
|
* @since 4.10
|
||||||
|
*/
|
||||||
|
public class BinaryBlobException extends Exception {
|
||||||
|
private static final long serialVersionUID = 1L;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Construct a BinaryBlobException.
|
||||||
|
*/
|
||||||
|
public BinaryBlobException() {}
|
||||||
|
}
|
Loading…
Reference in New Issue