Add openStream to ObjectLoader for big blobs

Blobs that are too large to read as a single byte array should be
accessed through an InputStream based interface instead, allowing
the application to walk through the data stream incrementally.

Define the basic interface to support streaming contents, but don't
implement it yet for the file based backend.

Change-Id: If9e4442e9ef4ed52c3e0f1af9398199a73145516
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
This commit is contained in:
Shawn O. Pearce 2010-06-30 18:36:10 -07:00
parent a1d5f5b6b5
commit 23e7f6376a
5 changed files with 318 additions and 2 deletions

View File

@ -0,0 +1,67 @@
/*
* Copyright (C) 2010, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.errors;
import org.eclipse.jgit.lib.ObjectId;
/** An object is too big to load into memory as a single byte array. */
public class LargeObjectException extends RuntimeException {
private static final long serialVersionUID = 1L;
/** Create a large object exception, where the object isn't known. */
public LargeObjectException() {
// Do nothing.
}
/**
* Create a large object exception, naming the object that is too big.
*
* @param id
* identity of the object that is too big to be loaded as a byte
* array in this JVM.
*/
public LargeObjectException(ObjectId id) {
super(id.name());
}
}

View File

@ -47,12 +47,20 @@
package org.eclipse.jgit.lib;
import java.io.EOFException;
import java.io.IOException;
import java.io.OutputStream;
import org.eclipse.jgit.errors.LargeObjectException;
import org.eclipse.jgit.errors.MissingObjectException;
/**
* Base class for a set of loaders for different representations of Git objects.
* New loaders are constructed for every object.
*/
public abstract class ObjectLoader {
private static final int LARGE_OBJECT = 1024 * 1024;
/**
* @return Git in pack object type, see {@link Constants}.
*/
@ -63,6 +71,15 @@ public abstract class ObjectLoader {
*/
public abstract long getSize();
/**
* @return true if this object is too large to obtain as a byte array.
* Objects over a certain threshold should be accessed only by their
* {@link #openStream()} to prevent overflowing the JVM heap.
*/
public boolean isLarge() {
return LARGE_OBJECT <= getSize();
}
/**
* Obtain a copy of the bytes of this object.
* <p>
@ -70,8 +87,12 @@ public abstract class ObjectLoader {
* be modified by the caller.
*
* @return the bytes of this object.
* @throws LargeObjectException
* if the object won't fit into a byte array, because
* {@link #isLarge()} returns true. Callers should use
* {@link #openStream()} instead to access the contents.
*/
public final byte[] getBytes() {
public final byte[] getBytes() throws LargeObjectException {
final byte[] data = getCachedBytes();
final byte[] copy = new byte[data.length];
System.arraycopy(data, 0, copy, 0, data.length);
@ -87,6 +108,69 @@ public final byte[] getBytes() {
* Changes (if made) will affect the cache but not the repository itself.
*
* @return the cached bytes of this object. Do not modify it.
* @throws LargeObjectException
* if the object won't fit into a byte array, because
* {@link #isLarge()} returns true. Callers should use
* {@link #openStream()} instead to access the contents.
*/
public abstract byte[] getCachedBytes();
public abstract byte[] getCachedBytes() throws LargeObjectException;
/**
* Obtain an input stream to read this object's data.
*
* @return a stream of this object's data. Caller must close the stream when
* through with it. The returned stream is buffered with a
* reasonable buffer size.
* @throws MissingObjectException
* the object no longer exists.
* @throws IOException
* the object store cannot be accessed.
*/
public abstract ObjectStream openStream() throws MissingObjectException,
IOException;
/**
* Copy this object to the output stream.
* <p>
* For some object store implementations, this method may be more efficient
* than reading from {@link #openStream()} into a temporary byte array, then
* writing to the destination stream.
* <p>
* The default implementation of this method is to copy with a temporary
* byte array for large objects, or to pass through the cached byte array
* for small objects.
*
* @param out
* stream to receive the complete copy of this object's data.
* Caller is responsible for flushing or closing this stream
* after this method returns.
* @throws MissingObjectException
* the object no longer exists.
* @throws IOException
* the object store cannot be accessed, or the stream cannot be
* written to.
*/
public void copyTo(OutputStream out) throws MissingObjectException,
IOException {
if (isLarge()) {
ObjectStream in = openStream();
try {
byte[] tmp = new byte[1024];
long copied = 0;
for (;;) {
int n = in.read(tmp);
if (n < 0)
break;
out.write(tmp, 0, n);
copied += n;
}
if (copied != getSize())
throw new EOFException();
} finally {
in.close();
}
} else {
out.write(getCachedBytes());
}
}
}

View File

@ -0,0 +1,137 @@
/*
* Copyright (C) 2010, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.lib;
import java.io.InputStream;
/** Stream of data coming from an object loaded by {@link ObjectLoader}. */
public abstract class ObjectStream extends InputStream {
/** @return Git object type, see {@link Constants}. */
public abstract int getType();
/** @return total size of object in bytes */
public abstract long getSize();
/**
* Simple stream around the cached byte array created by a loader.
* <p>
* ObjectLoader implementations can use this stream type when the object's
* content is small enough to be accessed as a single byte array, but the
* application has still requested it in stream format.
*/
public static class SmallStream extends ObjectStream {
private final int type;
private final byte[] data;
private int ptr;
private int mark;
/**
* Create the stream from an existing loader's cached bytes.
*
* @param loader
* the loader.
*/
public SmallStream(ObjectLoader loader) {
this.type = loader.getType();
this.data = loader.getCachedBytes();
}
@Override
public int getType() {
return type;
}
@Override
public long getSize() {
return data.length;
}
@Override
public int available() {
return data.length - ptr;
}
@Override
public long skip(long n) {
int s = (int) Math.min(available(), Math.max(0, n));
ptr += s;
return s;
}
@Override
public int read() {
if (ptr == data.length)
return -1;
return data[ptr++] & 0xff;
}
@Override
public int read(byte[] b, int off, int len) {
if (ptr == data.length)
return -1;
int n = Math.min(available(), len);
System.arraycopy(data, ptr, b, off, n);
ptr += n;
return n;
}
@Override
public boolean markSupported() {
return true;
}
@Override
public void mark(int readlimit) {
mark = ptr;
}
@Override
public void reset() {
ptr = mark;
}
}
}

View File

@ -46,7 +46,11 @@
package org.eclipse.jgit.storage.file;
import java.io.IOException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.lib.ObjectLoader;
import org.eclipse.jgit.lib.ObjectStream;
/** Object loaded in from a {@link PackFile}. */
final class PackedObjectLoader extends ObjectLoader {
@ -71,4 +75,15 @@ public final long getSize() {
public final byte[] getCachedBytes() {
return data;
}
@Override
public final boolean isLarge() {
return false;
}
@Override
public final ObjectStream openStream() throws MissingObjectException,
IOException {
return new ObjectStream.SmallStream(this);
}
}

View File

@ -52,10 +52,12 @@
import org.eclipse.jgit.JGitText;
import org.eclipse.jgit.errors.CorruptObjectException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.InflaterCache;
import org.eclipse.jgit.lib.ObjectLoader;
import org.eclipse.jgit.lib.ObjectStream;
import org.eclipse.jgit.util.IO;
import org.eclipse.jgit.util.MutableInteger;
import org.eclipse.jgit.util.RawParseUtils;
@ -214,4 +216,15 @@ public long getSize() {
public byte[] getCachedBytes() {
return bytes;
}
@Override
public final boolean isLarge() {
return false;
}
@Override
public final ObjectStream openStream() throws MissingObjectException,
IOException {
return new ObjectStream.SmallStream(this);
}
}