Increase core.streamFileThreshold default to 50 MiB

Projects like org.eclipse.mdt contain large XML files about 6 MiB
in size.  So does the Android project platform/frameworks/base.
Doing a clone of either project with JGit takes forever to checkout
the files into the working directory, because delta decompression
tends to be very expensive as we need to constantly reposition the
base stream for each copy instruction.  This can be made worse by
a very bad ordering of offsets, possibly due to an XML editor that
doesn't preserve the order of elements in the file very well.

Increasing the threshold to the same limit PackWriter uses when
doing delta compression (50 MiB) permits a default configured
JGit to decompress these XML file objects using the faster
random-access arrays, rather than re-seeking through an inflate
stream, significantly reducing checkout time after a clone.

Since this new limit may be dangerously close to the JVM maximum
heap size, every allocation attempt is now wrapped in a try/catch
so that JGit can degrade by switching to the large object stream
mode when the allocation is refused.  It will run slower, but the
operation will still complete.

The large stream mode will run very well for big objects that aren't
delta compressed, and is acceptable for delta compressed objects that
are using only forward referencing copy instructions.  Copies using
prior offsets are still going to be horrible, and there is nothing
we can do about it except increase core.streamFileThreshold.

We might in the future want to consider changing the way the delta
generators work in JGit and native C Git to avoid prior offsets once
an object reaches a certain size, even if that causes the delta
instruction stream to be slightly larger.  Unfortunately native
C Git won't want to do that until its also able to stream objects
rather than malloc them as contiguous blocks.

Change-Id: Ief7a3896afce15073e80d3691bed90c6a3897307
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
Signed-off-by: Chris Aniszczyk <caniszczyk@gmail.com>
This commit is contained in:
Shawn O. Pearce 2010-09-16 17:02:27 -07:00 committed by Chris Aniszczyk
parent b0bfa8044a
commit 7ba31474a3
8 changed files with 105 additions and 109 deletions

View File

@ -70,6 +70,8 @@
import org.eclipse.jgit.util.TemporaryBuffer;
public class PackFileTest extends LocalDiskRepositoryTestCase {
private int streamThreshold = 16 * 1024;
private TestRng rng;
private FileRepository repo;
@ -80,6 +82,11 @@ public class PackFileTest extends LocalDiskRepositoryTestCase {
protected void setUp() throws Exception {
super.setUp();
WindowCacheConfig cfg = new WindowCacheConfig();
cfg.setStreamFileThreshold(streamThreshold);
WindowCache.reconfigure(cfg);
rng = new TestRng(getName());
repo = createBareRepository();
tr = new TestRepository<FileRepository>(repo);
@ -89,6 +96,7 @@ protected void setUp() throws Exception {
protected void tearDown() throws Exception {
if (wc != null)
wc.release();
WindowCache.reconfigure(new WindowCacheConfig());
super.tearDown();
}
@ -120,7 +128,7 @@ public void testWhole_SmallObject() throws Exception {
public void testWhole_LargeObject() throws Exception {
final int type = Constants.OBJ_BLOB;
byte[] data = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5);
byte[] data = rng.nextBytes(streamThreshold + 5);
RevBlob id = tr.blob(data);
tr.branch("master").commit().add("A", id).create();
tr.packAndPrune();
@ -213,7 +221,7 @@ public void testDelta_SmallObjectChain() throws Exception {
public void testDelta_LargeObjectChain() throws Exception {
ObjectInserter.Formatter fmt = new ObjectInserter.Formatter();
byte[] data0 = new byte[ObjectLoader.STREAM_THRESHOLD + 5];
byte[] data0 = new byte[streamThreshold + 5];
Arrays.fill(data0, (byte) 0xf3);
ObjectId id0 = fmt.idFor(Constants.OBJ_BLOB, data0);
@ -277,64 +285,6 @@ public void testDelta_LargeObjectChain() throws Exception {
in.close();
}
public void testDelta_LargeInstructionStream() throws Exception {
ObjectInserter.Formatter fmt = new ObjectInserter.Formatter();
byte[] data0 = new byte[32];
Arrays.fill(data0, (byte) 0xf3);
ObjectId id0 = fmt.idFor(Constants.OBJ_BLOB, data0);
byte[] data3 = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5);
ByteArrayOutputStream tmp = new ByteArrayOutputStream();
DeltaEncoder de = new DeltaEncoder(tmp, data0.length, data3.length);
de.insert(data3, 0, data3.length);
byte[] delta3 = tmp.toByteArray();
assertTrue(delta3.length > ObjectLoader.STREAM_THRESHOLD);
TemporaryBuffer.Heap pack = new TemporaryBuffer.Heap(
ObjectLoader.STREAM_THRESHOLD + 1024);
packHeader(pack, 2);
objectHeader(pack, Constants.OBJ_BLOB, data0.length);
deflate(pack, data0);
ObjectId id3 = fmt.idFor(Constants.OBJ_BLOB, data3);
objectHeader(pack, Constants.OBJ_REF_DELTA, delta3.length);
id0.copyRawTo(pack);
deflate(pack, delta3);
digest(pack);
final byte[] raw = pack.toByteArray();
IndexPack ip = IndexPack.create(repo, new ByteArrayInputStream(raw));
ip.setFixThin(true);
ip.index(NullProgressMonitor.INSTANCE);
ip.renameAndOpenPack();
assertTrue("has blob", wc.has(id3));
ObjectLoader ol = wc.open(id3);
assertNotNull("created loader", ol);
assertEquals(Constants.OBJ_BLOB, ol.getType());
assertEquals(data3.length, ol.getSize());
assertTrue("is large", ol.isLarge());
try {
ol.getCachedBytes();
fail("Should have thrown LargeObjectException");
} catch (LargeObjectException tooBig) {
assertEquals(MessageFormat.format(
JGitText.get().largeObjectException, id3.name()), tooBig
.getMessage());
}
ObjectStream in = ol.openStream();
assertNotNull("have stream", in);
assertEquals(Constants.OBJ_BLOB, in.getType());
assertEquals(data3.length, in.getSize());
byte[] act = new byte[data3.length];
IO.readFully(in, act, 0, data3.length);
assertTrue("same content", Arrays.equals(act, data3));
assertEquals("stream at EOF", -1, in.read());
in.close();
}
private byte[] clone(int first, byte[] base) {
byte[] r = new byte[base.length];
System.arraycopy(base, 1, r, 1, r.length - 1);

View File

@ -67,6 +67,8 @@
import org.eclipse.jgit.util.IO;
public class UnpackedObjectTest extends LocalDiskRepositoryTestCase {
private int streamThreshold = 16 * 1024;
private TestRng rng;
private FileRepository repo;
@ -75,6 +77,11 @@ public class UnpackedObjectTest extends LocalDiskRepositoryTestCase {
protected void setUp() throws Exception {
super.setUp();
WindowCacheConfig cfg = new WindowCacheConfig();
cfg.setStreamFileThreshold(streamThreshold);
WindowCache.reconfigure(cfg);
rng = new TestRng(getName());
repo = createBareRepository();
wc = (WindowCursor) repo.newObjectReader();
@ -83,6 +90,7 @@ protected void setUp() throws Exception {
protected void tearDown() throws Exception {
if (wc != null)
wc.release();
WindowCache.reconfigure(new WindowCacheConfig());
super.tearDown();
}
@ -113,7 +121,7 @@ public void testStandardFormat_SmallObject() throws Exception {
public void testStandardFormat_LargeObject() throws Exception {
final int type = Constants.OBJ_BLOB;
byte[] data = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5);
byte[] data = rng.nextBytes(streamThreshold + 5);
ObjectId id = new ObjectInserter.Formatter().idFor(type, data);
write(id, compressStandardFormat(type, data));
@ -268,7 +276,7 @@ public void testStandardFormat_SmallObject_TrailingGarbage()
public void testStandardFormat_LargeObject_CorruptZLibStream()
throws Exception {
final int type = Constants.OBJ_BLOB;
byte[] data = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5);
byte[] data = rng.nextBytes(streamThreshold + 5);
ObjectId id = new ObjectInserter.Formatter().idFor(type, data);
byte[] gz = compressStandardFormat(type, data);
gz[gz.length - 1] = 0;
@ -305,7 +313,7 @@ public void testStandardFormat_LargeObject_CorruptZLibStream()
public void testStandardFormat_LargeObject_TruncatedZLibStream()
throws Exception {
final int type = Constants.OBJ_BLOB;
byte[] data = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5);
byte[] data = rng.nextBytes(streamThreshold + 5);
ObjectId id = new ObjectInserter.Formatter().idFor(type, data);
byte[] gz = compressStandardFormat(type, data);
byte[] tr = new byte[gz.length - 1];
@ -339,7 +347,7 @@ public void testStandardFormat_LargeObject_TruncatedZLibStream()
public void testStandardFormat_LargeObject_TrailingGarbage()
throws Exception {
final int type = Constants.OBJ_BLOB;
byte[] data = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5);
byte[] data = rng.nextBytes(streamThreshold + 5);
ObjectId id = new ObjectInserter.Formatter().idFor(type, data);
byte[] gz = compressStandardFormat(type, data);
byte[] tr = new byte[gz.length + 1];
@ -396,7 +404,7 @@ public void testPackFormat_SmallObject() throws Exception {
public void testPackFormat_LargeObject() throws Exception {
final int type = Constants.OBJ_BLOB;
byte[] data = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5);
byte[] data = rng.nextBytes(streamThreshold + 5);
ObjectId id = new ObjectInserter.Formatter().idFor(type, data);
write(id, compressPackFormat(type, data));

View File

@ -335,6 +335,7 @@ repositoryState_rebaseOrApplyMailbox=Rebase/Apply mailbox
repositoryState_rebaseWithMerge=Rebase w/merge
requiredHashFunctionNotAvailable=Required hash function {0} not available.
resolvingDeltas=Resolving deltas
resultLengthIncorrect=result length incorrect
searchForReuse=Finding sources
sequenceTooLargeForDiffAlgorithm=Sequence too large for difference algorithm.
serviceNotPermitted={0} not permitted

View File

@ -395,6 +395,7 @@ public static JGitText get() {
/***/ public String repositoryState_rebaseWithMerge;
/***/ public String requiredHashFunctionNotAvailable;
/***/ public String resolvingDeltas;
/***/ public String resultLengthIncorrect;
/***/ public String searchForReuse;
/***/ public String sequenceTooLargeForDiffAlgorithm;
/***/ public String serviceNotPermitted;

View File

@ -60,14 +60,6 @@
* New loaders are constructed for every object.
*/
public abstract class ObjectLoader {
/**
* Default setting for the large object threshold.
* <p>
* Objects larger than this size must be accessed as a stream through the
* loader's {@link #openStream()} method.
*/
public static final int STREAM_THRESHOLD = 5 * 1024 * 1024;
/**
* @return Git in pack object type, see {@link Constants}.
*/

View File

@ -63,6 +63,7 @@
import org.eclipse.jgit.JGitText;
import org.eclipse.jgit.errors.CorruptObjectException;
import org.eclipse.jgit.errors.LargeObjectException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.errors.PackInvalidException;
import org.eclipse.jgit.errors.PackMismatchException;
@ -274,12 +275,11 @@ ObjectId findObjectForOffset(final long offset) throws IOException {
return getReverseIdx().findObject(offset);
}
private final byte[] decompress(final long position, final long totalSize,
final WindowCursor curs) throws IOException, DataFormatException {
final byte[] dstbuf = new byte[(int) totalSize];
if (curs.inflate(this, position, dstbuf, 0) != totalSize)
private final void decompress(final long position, final WindowCursor curs,
final byte[] dstbuf, final int dstoff, final int dstsz)
throws IOException, DataFormatException {
if (curs.inflate(this, position, dstbuf, dstoff) != dstsz)
throw new EOFException(MessageFormat.format(JGitText.get().shortCompressedStreamAt, position));
return dstbuf;
}
final void copyAsIs(PackOutputStream out, LocalObjectToPack src,
@ -630,10 +630,16 @@ ObjectLoader load(final WindowCursor curs, final long pos)
case Constants.OBJ_BLOB:
case Constants.OBJ_TAG: {
if (sz < curs.getStreamFileThreshold()) {
byte[] data = decompress(pos + p, sz, curs);
byte[] data;
try {
data = new byte[(int) sz];
} catch (OutOfMemoryError tooBig) {
return largeWhole(curs, pos, type, sz, p);
}
decompress(pos + p, curs, data, 0, data.length);
return new ObjectLoader.SmallObject(type, data);
}
return new LargePackedWholeObject(type, sz, pos, p, this, curs.db);
return largeWhole(curs, pos, type, sz, p);
}
case Constants.OBJ_OFS_DELTA: {
@ -680,44 +686,58 @@ private long findDeltaBase(ObjectId baseId) throws IOException,
private ObjectLoader loadDelta(long posSelf, int hdrLen, long sz,
long posBase, WindowCursor curs) throws IOException,
DataFormatException {
if (curs.getStreamFileThreshold() <= sz) {
// The delta instruction stream itself is pretty big, and
// that implies the resulting object is going to be massive.
// Use only the large delta format here.
//
return new LargePackedDeltaObject(posSelf, posBase, hdrLen, //
this, curs.db);
}
if (Integer.MAX_VALUE <= sz)
return largeDelta(posSelf, hdrLen, posBase, curs);
byte[] data;
byte[] base;
int type;
DeltaBaseCache.Entry e = DeltaBaseCache.get(this, posBase);
if (e != null) {
data = e.data;
base = e.data;
type = e.type;
} else {
ObjectLoader p = load(curs, posBase);
if (p.isLarge()) {
// The base itself is large. We have to produce a large
// delta stream as we don't want to build the whole base.
//
return new LargePackedDeltaObject(posSelf, posBase, hdrLen,
this, curs.db);
try {
base = p.getCachedBytes(curs.getStreamFileThreshold());
} catch (LargeObjectException tooBig) {
return largeDelta(posSelf, hdrLen, posBase, curs);
}
data = p.getCachedBytes();
type = p.getType();
DeltaBaseCache.store(this, posBase, data, type);
DeltaBaseCache.store(this, posBase, base, type);
}
// At this point we have the base, and its small, and the delta
// stream also is small, so the result object cannot be more than
// 2x our small size. This occurs if the delta instructions were
// "copy entire base, literal insert entire delta". Go with the
// faster small object style at this point.
//
data = BinaryDelta.apply(data, decompress(posSelf + hdrLen, sz, curs));
return new ObjectLoader.SmallObject(type, data);
final byte[] delta;
try {
delta = new byte[(int) sz];
} catch (OutOfMemoryError tooBig) {
return largeDelta(posSelf, hdrLen, posBase, curs);
}
decompress(posSelf + hdrLen, curs, delta, 0, delta.length);
sz = BinaryDelta.getResultSize(delta);
if (Integer.MAX_VALUE <= sz)
return largeDelta(posSelf, hdrLen, posBase, curs);
final byte[] result;
try {
result = new byte[(int) sz];
} catch (OutOfMemoryError tooBig) {
return largeDelta(posSelf, hdrLen, posBase, curs);
}
BinaryDelta.apply(base, delta, result);
return new ObjectLoader.SmallObject(type, result);
}
private LargePackedWholeObject largeWhole(final WindowCursor curs,
final long pos, final int type, long sz, int p) {
return new LargePackedWholeObject(type, sz, pos, p, this, curs.db);
}
private LargePackedDeltaObject largeDelta(long posObj, int hdrLen,
long posBase, WindowCursor wc) {
return new LargePackedDeltaObject(posObj, posBase, hdrLen, this, wc.db);
}
byte[] getDeltaHeader(WindowCursor wc, long pos)

View File

@ -44,7 +44,7 @@
package org.eclipse.jgit.storage.file;
import org.eclipse.jgit.lib.Config;
import org.eclipse.jgit.lib.ObjectLoader;
import org.eclipse.jgit.storage.pack.PackConfig;
/** Configuration parameters for {@link WindowCache}. */
public class WindowCacheConfig {
@ -73,7 +73,7 @@ public WindowCacheConfig() {
packedGitWindowSize = 8 * KB;
packedGitMMAP = false;
deltaBaseCacheLimit = 10 * MB;
streamFileThreshold = ObjectLoader.STREAM_THRESHOLD;
streamFileThreshold = PackConfig.DEFAULT_BIG_FILE_THRESHOLD;
}
/**

View File

@ -115,6 +115,25 @@ public static long getResultSize(final byte[] delta) {
* @return patched base
*/
public static final byte[] apply(final byte[] base, final byte[] delta) {
return apply(base, delta, null);
}
/**
* Apply the changes defined by delta to the data in base, yielding a new
* array of bytes.
*
* @param base
* some byte representing an object of some kind.
* @param delta
* a git pack delta defining the transform from one version to
* another.
* @param result
* array to store the result into. If null the result will be
* allocated and returned.
* @return either {@code result}, or the result array allocated.
*/
public static final byte[] apply(final byte[] base, final byte[] delta,
byte[] result) {
int deltaPtr = 0;
// Length of the base object (a variable length int).
@ -140,7 +159,12 @@ public static final byte[] apply(final byte[] base, final byte[] delta) {
shift += 7;
} while ((c & 0x80) != 0);
final byte[] result = new byte[resLen];
if (result == null)
result = new byte[resLen];
else if (result.length != resLen)
throw new IllegalArgumentException(
JGitText.get().resultLengthIncorrect);
int resultPtr = 0;
while (deltaPtr < delta.length) {
final int cmd = delta[deltaPtr++] & 0xff;