From 23e7f6376ada54b1be232b15366d850d3038b95f Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Wed, 30 Jun 2010 18:36:10 -0700 Subject: [PATCH] Add openStream to ObjectLoader for big blobs Blobs that are too large to read as a single byte array should be accessed through an InputStream based interface instead, allowing the application to walk through the data stream incrementally. Define the basic interface to support streaming contents, but don't implement it yet for the file based backend. Change-Id: If9e4442e9ef4ed52c3e0f1af9398199a73145516 Signed-off-by: Shawn O. Pearce --- .../jgit/errors/LargeObjectException.java | 67 +++++++++ .../org/eclipse/jgit/lib/ObjectLoader.java | 88 ++++++++++- .../org/eclipse/jgit/lib/ObjectStream.java | 137 ++++++++++++++++++ .../jgit/storage/file/PackedObjectLoader.java | 15 ++ .../storage/file/UnpackedObjectLoader.java | 13 ++ 5 files changed, 318 insertions(+), 2 deletions(-) create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/errors/LargeObjectException.java create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectStream.java diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/errors/LargeObjectException.java b/org.eclipse.jgit/src/org/eclipse/jgit/errors/LargeObjectException.java new file mode 100644 index 000000000..d897c51de --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/errors/LargeObjectException.java @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.errors; + +import org.eclipse.jgit.lib.ObjectId; + +/** An object is too big to load into memory as a single byte array. */ +public class LargeObjectException extends RuntimeException { + private static final long serialVersionUID = 1L; + + /** Create a large object exception, where the object isn't known. */ + public LargeObjectException() { + // Do nothing. + } + + /** + * Create a large object exception, naming the object that is too big. + * + * @param id + * identity of the object that is too big to be loaded as a byte + * array in this JVM. + */ + public LargeObjectException(ObjectId id) { + super(id.name()); + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectLoader.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectLoader.java index 1a8d1ba9b..e7be11a13 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectLoader.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectLoader.java @@ -47,12 +47,20 @@ package org.eclipse.jgit.lib; +import java.io.EOFException; +import java.io.IOException; +import java.io.OutputStream; + +import org.eclipse.jgit.errors.LargeObjectException; +import org.eclipse.jgit.errors.MissingObjectException; /** * Base class for a set of loaders for different representations of Git objects. * New loaders are constructed for every object. */ public abstract class ObjectLoader { + private static final int LARGE_OBJECT = 1024 * 1024; + /** * @return Git in pack object type, see {@link Constants}. */ @@ -63,6 +71,15 @@ public abstract class ObjectLoader { */ public abstract long getSize(); + /** + * @return true if this object is too large to obtain as a byte array. + * Objects over a certain threshold should be accessed only by their + * {@link #openStream()} to prevent overflowing the JVM heap. + */ + public boolean isLarge() { + return LARGE_OBJECT <= getSize(); + } + /** * Obtain a copy of the bytes of this object. *

@@ -70,8 +87,12 @@ public abstract class ObjectLoader { * be modified by the caller. * * @return the bytes of this object. + * @throws LargeObjectException + * if the object won't fit into a byte array, because + * {@link #isLarge()} returns true. Callers should use + * {@link #openStream()} instead to access the contents. */ - public final byte[] getBytes() { + public final byte[] getBytes() throws LargeObjectException { final byte[] data = getCachedBytes(); final byte[] copy = new byte[data.length]; System.arraycopy(data, 0, copy, 0, data.length); @@ -87,6 +108,69 @@ public final byte[] getBytes() { * Changes (if made) will affect the cache but not the repository itself. * * @return the cached bytes of this object. Do not modify it. + * @throws LargeObjectException + * if the object won't fit into a byte array, because + * {@link #isLarge()} returns true. Callers should use + * {@link #openStream()} instead to access the contents. */ - public abstract byte[] getCachedBytes(); + public abstract byte[] getCachedBytes() throws LargeObjectException; + + /** + * Obtain an input stream to read this object's data. + * + * @return a stream of this object's data. Caller must close the stream when + * through with it. The returned stream is buffered with a + * reasonable buffer size. + * @throws MissingObjectException + * the object no longer exists. + * @throws IOException + * the object store cannot be accessed. + */ + public abstract ObjectStream openStream() throws MissingObjectException, + IOException; + + /** + * Copy this object to the output stream. + *

+ * For some object store implementations, this method may be more efficient + * than reading from {@link #openStream()} into a temporary byte array, then + * writing to the destination stream. + *

+ * The default implementation of this method is to copy with a temporary + * byte array for large objects, or to pass through the cached byte array + * for small objects. + * + * @param out + * stream to receive the complete copy of this object's data. + * Caller is responsible for flushing or closing this stream + * after this method returns. + * @throws MissingObjectException + * the object no longer exists. + * @throws IOException + * the object store cannot be accessed, or the stream cannot be + * written to. + */ + public void copyTo(OutputStream out) throws MissingObjectException, + IOException { + if (isLarge()) { + ObjectStream in = openStream(); + try { + byte[] tmp = new byte[1024]; + long copied = 0; + for (;;) { + int n = in.read(tmp); + if (n < 0) + break; + out.write(tmp, 0, n); + copied += n; + } + if (copied != getSize()) + throw new EOFException(); + } finally { + in.close(); + } + } else { + out.write(getCachedBytes()); + } + } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectStream.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectStream.java new file mode 100644 index 000000000..ec2e8f099 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectStream.java @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.lib; + +import java.io.InputStream; + +/** Stream of data coming from an object loaded by {@link ObjectLoader}. */ +public abstract class ObjectStream extends InputStream { + /** @return Git object type, see {@link Constants}. */ + public abstract int getType(); + + /** @return total size of object in bytes */ + public abstract long getSize(); + + /** + * Simple stream around the cached byte array created by a loader. + *

+ * ObjectLoader implementations can use this stream type when the object's + * content is small enough to be accessed as a single byte array, but the + * application has still requested it in stream format. + */ + public static class SmallStream extends ObjectStream { + private final int type; + + private final byte[] data; + + private int ptr; + + private int mark; + + /** + * Create the stream from an existing loader's cached bytes. + * + * @param loader + * the loader. + */ + public SmallStream(ObjectLoader loader) { + this.type = loader.getType(); + this.data = loader.getCachedBytes(); + } + + @Override + public int getType() { + return type; + } + + @Override + public long getSize() { + return data.length; + } + + @Override + public int available() { + return data.length - ptr; + } + + @Override + public long skip(long n) { + int s = (int) Math.min(available(), Math.max(0, n)); + ptr += s; + return s; + } + + @Override + public int read() { + if (ptr == data.length) + return -1; + return data[ptr++] & 0xff; + } + + @Override + public int read(byte[] b, int off, int len) { + if (ptr == data.length) + return -1; + int n = Math.min(available(), len); + System.arraycopy(data, ptr, b, off, n); + ptr += n; + return n; + } + + @Override + public boolean markSupported() { + return true; + } + + @Override + public void mark(int readlimit) { + mark = ptr; + } + + @Override + public void reset() { + ptr = mark; + } + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackedObjectLoader.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackedObjectLoader.java index ad4042e17..f056c7413 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackedObjectLoader.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackedObjectLoader.java @@ -46,7 +46,11 @@ package org.eclipse.jgit.storage.file; +import java.io.IOException; + +import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.lib.ObjectLoader; +import org.eclipse.jgit.lib.ObjectStream; /** Object loaded in from a {@link PackFile}. */ final class PackedObjectLoader extends ObjectLoader { @@ -71,4 +75,15 @@ public final long getSize() { public final byte[] getCachedBytes() { return data; } + + @Override + public final boolean isLarge() { + return false; + } + + @Override + public final ObjectStream openStream() throws MissingObjectException, + IOException { + return new ObjectStream.SmallStream(this); + } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/UnpackedObjectLoader.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/UnpackedObjectLoader.java index b85ec149e..4a70793d0 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/UnpackedObjectLoader.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/UnpackedObjectLoader.java @@ -52,10 +52,12 @@ import org.eclipse.jgit.JGitText; import org.eclipse.jgit.errors.CorruptObjectException; +import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.InflaterCache; import org.eclipse.jgit.lib.ObjectLoader; +import org.eclipse.jgit.lib.ObjectStream; import org.eclipse.jgit.util.IO; import org.eclipse.jgit.util.MutableInteger; import org.eclipse.jgit.util.RawParseUtils; @@ -214,4 +216,15 @@ public long getSize() { public byte[] getCachedBytes() { return bytes; } + + @Override + public final boolean isLarge() { + return false; + } + + @Override + public final ObjectStream openStream() throws MissingObjectException, + IOException { + return new ObjectStream.SmallStream(this); + } }