From 2292655e9ecab7305c4f481f9c1a9463a383e834 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Tue, 24 Aug 2010 12:46:56 -0700 Subject: [PATCH] Add brute force byte array loading to ObjectLoader Some algorithms are coded in a way that requires us to provide them the entire object contents as a contiguous byte array. The parsers in RevCommit and RevTag, or our RawText objects are really good examples of these. Instead of duplicating this logic everywhere, lets put it into the base ObjectLoader type. That way the caller only needs to give us their upper size bound, and we'll do the rest of the heavy work to figure out if the object still fits within that bound, and get them an array that has the complete contents. Change-Id: Id95a7f79d2b97e39f6949370ccca2f2c9cfb1a0f Signed-off-by: Shawn O. Pearce Signed-off-by: Chris Aniszczyk --- .../eclipse/jgit/lib/ObjectLoaderTest.java | 270 ++++++++++++++++++ .../org/eclipse/jgit/lib/ObjectLoader.java | 100 ++++++- .../org/eclipse/jgit/lib/ObjectStream.java | 16 +- 3 files changed, 380 insertions(+), 6 deletions(-) create mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/lib/ObjectLoaderTest.java diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/lib/ObjectLoaderTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/lib/ObjectLoaderTest.java new file mode 100644 index 000000000..0280b4a45 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/lib/ObjectLoaderTest.java @@ -0,0 +1,270 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.lib; + +import static org.eclipse.jgit.lib.Constants.OBJ_BLOB; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.Arrays; + +import junit.framework.TestCase; + +import org.eclipse.jgit.errors.LargeObjectException; +import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.junit.TestRng; + +public class ObjectLoaderTest extends TestCase { + private TestRng rng; + + protected void setUp() throws Exception { + super.setUp(); + rng = new TestRng(getName()); + } + + public void testSmallObjectLoader() throws MissingObjectException, + IOException { + final byte[] act = rng.nextBytes(512); + final ObjectLoader ldr = new ObjectLoader.SmallObject(OBJ_BLOB, act); + + assertEquals(OBJ_BLOB, ldr.getType()); + assertEquals(act.length, ldr.getSize()); + assertFalse("not is large", ldr.isLarge()); + assertSame(act, ldr.getCachedBytes()); + assertSame(act, ldr.getCachedBytes(1)); + assertSame(act, ldr.getCachedBytes(Integer.MAX_VALUE)); + + byte[] copy = ldr.getBytes(); + assertNotSame(act, copy); + assertTrue("same content", Arrays.equals(act, copy)); + + copy = ldr.getBytes(1); + assertNotSame(act, copy); + assertTrue("same content", Arrays.equals(act, copy)); + + copy = ldr.getBytes(Integer.MAX_VALUE); + assertNotSame(act, copy); + assertTrue("same content", Arrays.equals(act, copy)); + + ObjectStream in = ldr.openStream(); + assertNotNull("has stream", in); + assertTrue("is small stream", in instanceof ObjectStream.SmallStream); + assertEquals(OBJ_BLOB, in.getType()); + assertEquals(act.length, in.getSize()); + assertEquals(act.length, in.available()); + assertTrue("mark supported", in.markSupported()); + copy = new byte[act.length]; + assertEquals(act.length, in.read(copy)); + assertEquals(0, in.available()); + assertEquals(-1, in.read()); + assertTrue("same content", Arrays.equals(act, copy)); + + ByteArrayOutputStream tmp = new ByteArrayOutputStream(); + ldr.copyTo(tmp); + assertTrue("same content", Arrays.equals(act, tmp.toByteArray())); + } + + public void testLargeObjectLoader() throws MissingObjectException, + IOException { + final byte[] act = rng.nextBytes(512); + final ObjectLoader ldr = new ObjectLoader() { + @Override + public byte[] getCachedBytes() throws LargeObjectException { + throw new LargeObjectException(); + } + + @Override + public long getSize() { + return act.length; + } + + @Override + public int getType() { + return OBJ_BLOB; + } + + @Override + public ObjectStream openStream() throws MissingObjectException, + IOException { + return new ObjectStream.Filter(getType(), act.length, + new ByteArrayInputStream(act)); + } + }; + + assertEquals(OBJ_BLOB, ldr.getType()); + assertEquals(act.length, ldr.getSize()); + assertTrue("is large", ldr.isLarge()); + + try { + ldr.getCachedBytes(); + fail("did not throw on getCachedBytes()"); + } catch (LargeObjectException tooBig) { + // expected + } + + try { + ldr.getBytes(); + fail("did not throw on getBytes()"); + } catch (LargeObjectException tooBig) { + // expected + } + + try { + ldr.getCachedBytes(64); + fail("did not throw on getCachedBytes(64)"); + } catch (LargeObjectException tooBig) { + // expected + } + + byte[] copy = ldr.getCachedBytes(1024); + assertNotSame(act, copy); + assertTrue("same content", Arrays.equals(act, copy)); + + ObjectStream in = ldr.openStream(); + assertNotNull("has stream", in); + assertEquals(OBJ_BLOB, in.getType()); + assertEquals(act.length, in.getSize()); + assertEquals(act.length, in.available()); + assertTrue("mark supported", in.markSupported()); + copy = new byte[act.length]; + assertEquals(act.length, in.read(copy)); + assertEquals(0, in.available()); + assertEquals(-1, in.read()); + assertTrue("same content", Arrays.equals(act, copy)); + + ByteArrayOutputStream tmp = new ByteArrayOutputStream(); + ldr.copyTo(tmp); + assertTrue("same content", Arrays.equals(act, tmp.toByteArray())); + } + + public void testLimitedGetCachedBytes() throws LargeObjectException, + MissingObjectException, IOException { + byte[] act = rng.nextBytes(512); + ObjectLoader ldr = new ObjectLoader.SmallObject(OBJ_BLOB, act) { + @Override + public boolean isLarge() { + return true; + } + }; + assertTrue("is large", ldr.isLarge()); + + try { + ldr.getCachedBytes(10); + fail("Did not throw LargeObjectException"); + } catch (LargeObjectException tooBig) { + // Expected result. + } + + byte[] copy = ldr.getCachedBytes(512); + assertNotSame(act, copy); + assertTrue("same content", Arrays.equals(act, copy)); + + copy = ldr.getCachedBytes(1024); + assertNotSame(act, copy); + assertTrue("same content", Arrays.equals(act, copy)); + } + + public void testLimitedGetCachedBytesExceedsJavaLimits() + throws LargeObjectException, MissingObjectException, IOException { + ObjectLoader ldr = new ObjectLoader() { + @Override + public boolean isLarge() { + return true; + } + + @Override + public byte[] getCachedBytes() throws LargeObjectException { + throw new LargeObjectException(); + } + + @Override + public long getSize() { + return Long.MAX_VALUE; + } + + @Override + public int getType() { + return OBJ_BLOB; + } + + @Override + public ObjectStream openStream() throws MissingObjectException, + IOException { + return new ObjectStream() { + @Override + public long getSize() { + return Long.MAX_VALUE; + } + + @Override + public int getType() { + return OBJ_BLOB; + } + + @Override + public int read() throws IOException { + fail("never should have reached read"); + return -1; + } + }; + } + }; + assertTrue("is large", ldr.isLarge()); + + try { + ldr.getCachedBytes(10); + fail("Did not throw LargeObjectException"); + } catch (LargeObjectException tooBig) { + // Expected result. + } + + try { + ldr.getCachedBytes(Integer.MAX_VALUE); + fail("Did not throw LargeObjectException"); + } catch (LargeObjectException tooBig) { + // Expected result. + } + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectLoader.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectLoader.java index b8d7f3719..661a5e8eb 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectLoader.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectLoader.java @@ -53,6 +53,7 @@ import org.eclipse.jgit.errors.LargeObjectException; import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.util.IO; /** * Base class for a set of loaders for different representations of Git objects. @@ -104,10 +105,42 @@ public boolean isLarge() { * {@link #openStream()} instead to access the contents. */ public final byte[] getBytes() throws LargeObjectException { - final byte[] data = getCachedBytes(); - final byte[] copy = new byte[data.length]; - System.arraycopy(data, 0, copy, 0, data.length); - return copy; + return cloneArray(getCachedBytes()); + } + + /** + * Obtain a copy of the bytes of this object. + * + * If the object size is less than or equal to {@code sizeLimit} this method + * will provide it as a byte array, even if {@link #isLarge()} is true. This + * utility is useful for application code that absolutely must have the + * object as a single contiguous byte array in memory. + * + * Unlike {@link #getCachedBytes(int)} this method returns an array that + * might be modified by the caller. + * + * @param sizeLimit + * maximum number of bytes to return. If the object is larger + * than this limit, {@link LargeObjectException} will be thrown. + * @return the bytes of this object. + * @throws LargeObjectException + * if the object is bigger than {@code sizeLimit}, or if + * {@link OutOfMemoryError} occurs during allocation of the + * result array. Callers should use {@link #openStream()} + * instead to access the contents. + * @throws MissingObjectException + * the object is large, and it no longer exists. + * @throws IOException + * the object store cannot be accessed. + */ + public final byte[] getBytes(int sizeLimit) throws LargeObjectException, + MissingObjectException, IOException { + byte[] cached = getCachedBytes(sizeLimit); + try { + return cloneArray(cached); + } catch (OutOfMemoryError tooBig) { + throw new LargeObjectException(); + } } /** @@ -126,6 +159,59 @@ public final byte[] getBytes() throws LargeObjectException { */ public abstract byte[] getCachedBytes() throws LargeObjectException; + /** + * Obtain a reference to the (possibly cached) bytes of this object. + * + * If the object size is less than or equal to {@code sizeLimit} this method + * will provide it as a byte array, even if {@link #isLarge()} is true. This + * utility is useful for application code that absolutely must have the + * object as a single contiguous byte array in memory. + * + * This method offers direct access to the internal caches, potentially + * saving on data copies between the internal cache and higher level code. + * Callers who receive this reference must not modify its contents. + * Changes (if made) will affect the cache but not the repository itself. + * + * @param sizeLimit + * maximum number of bytes to return. If the object size is + * larger than this limit and {@link #isLarge()} is true, + * {@link LargeObjectException} will be thrown. + * @return the cached bytes of this object. Do not modify it. + * @throws LargeObjectException + * if the object is bigger than {@code sizeLimit}, or if + * {@link OutOfMemoryError} occurs during allocation of the + * result array. Callers should use {@link #openStream()} + * instead to access the contents. + * @throws MissingObjectException + * the object is large, and it no longer exists. + * @throws IOException + * the object store cannot be accessed. + */ + public byte[] getCachedBytes(int sizeLimit) throws LargeObjectException, + MissingObjectException, IOException { + if (!isLarge()) + return getCachedBytes(); + + ObjectStream in = openStream(); + try { + long sz = in.getSize(); + if (sizeLimit < sz || Integer.MAX_VALUE < sz) + throw new LargeObjectException(); + + byte[] buf; + try { + buf = new byte[(int) sz]; + } catch (OutOfMemoryError notEnoughHeap) { + throw new LargeObjectException(); + } + + IO.readFully(in, buf, 0, buf.length); + return buf; + } finally { + in.close(); + } + } + /** * Obtain an input stream to read this object's data. * @@ -186,6 +272,12 @@ public void copyTo(OutputStream out) throws MissingObjectException, } } + private static byte[] cloneArray(final byte[] data) { + final byte[] copy = new byte[data.length]; + System.arraycopy(data, 0, copy, 0, data.length); + return copy; + } + /** * Simple loader around the cached byte array. *

diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectStream.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectStream.java index 86d66439d..4b3fe6af5 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectStream.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectStream.java @@ -77,8 +77,20 @@ public static class SmallStream extends ObjectStream { * the loader. */ public SmallStream(ObjectLoader loader) { - this.type = loader.getType(); - this.data = loader.getCachedBytes(); + this(loader.getType(), loader.getCachedBytes()); + } + + /** + * Create the stream from an existing byte array and type. + * + *@param type + * the type constant for the object. + *@param data + * the fully inflated content of the object. + */ + public SmallStream(int type, byte[] data) { + this.type = type; + this.data = data; } @Override