Increase core.streamFileThreshold default to 50 MiB

Projects like org.eclipse.mdt contain large XML files about 6 MiB
in size.  So does the Android project platform/frameworks/base.
Doing a clone of either project with JGit takes forever to checkout
the files into the working directory, because delta decompression
tends to be very expensive as we need to constantly reposition the
base stream for each copy instruction.  This can be made worse by
a very bad ordering of offsets, possibly due to an XML editor that
doesn't preserve the order of elements in the file very well.

Increasing the threshold to the same limit PackWriter uses when
doing delta compression (50 MiB) permits a default configured
JGit to decompress these XML file objects using the faster
random-access arrays, rather than re-seeking through an inflate
stream, significantly reducing checkout time after a clone.

Since this new limit may be dangerously close to the JVM maximum
heap size, every allocation attempt is now wrapped in a try/catch
so that JGit can degrade by switching to the large object stream
mode when the allocation is refused.  It will run slower, but the
operation will still complete.

The large stream mode will run very well for big objects that aren't
delta compressed, and is acceptable for delta compressed objects that
are using only forward referencing copy instructions.  Copies using
prior offsets are still going to be horrible, and there is nothing
we can do about it except increase core.streamFileThreshold.

We might in the future want to consider changing the way the delta
generators work in JGit and native C Git to avoid prior offsets once
an object reaches a certain size, even if that causes the delta
instruction stream to be slightly larger.  Unfortunately native
C Git won't want to do that until its also able to stream objects
rather than malloc them as contiguous blocks.

Change-Id: Ief7a3896afce15073e80d3691bed90c6a3897307
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
Signed-off-by: Chris Aniszczyk <caniszczyk@gmail.com>
This commit is contained in:
Shawn O. Pearce 2010-09-16 17:02:27 -07:00 committed by Chris Aniszczyk
parent b0bfa8044a
commit 7ba31474a3
8 changed files with 105 additions and 109 deletions

View File

@ -70,6 +70,8 @@
import org.eclipse.jgit.util.TemporaryBuffer; import org.eclipse.jgit.util.TemporaryBuffer;
public class PackFileTest extends LocalDiskRepositoryTestCase { public class PackFileTest extends LocalDiskRepositoryTestCase {
private int streamThreshold = 16 * 1024;
private TestRng rng; private TestRng rng;
private FileRepository repo; private FileRepository repo;
@ -80,6 +82,11 @@ public class PackFileTest extends LocalDiskRepositoryTestCase {
protected void setUp() throws Exception { protected void setUp() throws Exception {
super.setUp(); super.setUp();
WindowCacheConfig cfg = new WindowCacheConfig();
cfg.setStreamFileThreshold(streamThreshold);
WindowCache.reconfigure(cfg);
rng = new TestRng(getName()); rng = new TestRng(getName());
repo = createBareRepository(); repo = createBareRepository();
tr = new TestRepository<FileRepository>(repo); tr = new TestRepository<FileRepository>(repo);
@ -89,6 +96,7 @@ protected void setUp() throws Exception {
protected void tearDown() throws Exception { protected void tearDown() throws Exception {
if (wc != null) if (wc != null)
wc.release(); wc.release();
WindowCache.reconfigure(new WindowCacheConfig());
super.tearDown(); super.tearDown();
} }
@ -120,7 +128,7 @@ public void testWhole_SmallObject() throws Exception {
public void testWhole_LargeObject() throws Exception { public void testWhole_LargeObject() throws Exception {
final int type = Constants.OBJ_BLOB; final int type = Constants.OBJ_BLOB;
byte[] data = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5); byte[] data = rng.nextBytes(streamThreshold + 5);
RevBlob id = tr.blob(data); RevBlob id = tr.blob(data);
tr.branch("master").commit().add("A", id).create(); tr.branch("master").commit().add("A", id).create();
tr.packAndPrune(); tr.packAndPrune();
@ -213,7 +221,7 @@ public void testDelta_SmallObjectChain() throws Exception {
public void testDelta_LargeObjectChain() throws Exception { public void testDelta_LargeObjectChain() throws Exception {
ObjectInserter.Formatter fmt = new ObjectInserter.Formatter(); ObjectInserter.Formatter fmt = new ObjectInserter.Formatter();
byte[] data0 = new byte[ObjectLoader.STREAM_THRESHOLD + 5]; byte[] data0 = new byte[streamThreshold + 5];
Arrays.fill(data0, (byte) 0xf3); Arrays.fill(data0, (byte) 0xf3);
ObjectId id0 = fmt.idFor(Constants.OBJ_BLOB, data0); ObjectId id0 = fmt.idFor(Constants.OBJ_BLOB, data0);
@ -277,64 +285,6 @@ public void testDelta_LargeObjectChain() throws Exception {
in.close(); in.close();
} }
public void testDelta_LargeInstructionStream() throws Exception {
ObjectInserter.Formatter fmt = new ObjectInserter.Formatter();
byte[] data0 = new byte[32];
Arrays.fill(data0, (byte) 0xf3);
ObjectId id0 = fmt.idFor(Constants.OBJ_BLOB, data0);
byte[] data3 = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5);
ByteArrayOutputStream tmp = new ByteArrayOutputStream();
DeltaEncoder de = new DeltaEncoder(tmp, data0.length, data3.length);
de.insert(data3, 0, data3.length);
byte[] delta3 = tmp.toByteArray();
assertTrue(delta3.length > ObjectLoader.STREAM_THRESHOLD);
TemporaryBuffer.Heap pack = new TemporaryBuffer.Heap(
ObjectLoader.STREAM_THRESHOLD + 1024);
packHeader(pack, 2);
objectHeader(pack, Constants.OBJ_BLOB, data0.length);
deflate(pack, data0);
ObjectId id3 = fmt.idFor(Constants.OBJ_BLOB, data3);
objectHeader(pack, Constants.OBJ_REF_DELTA, delta3.length);
id0.copyRawTo(pack);
deflate(pack, delta3);
digest(pack);
final byte[] raw = pack.toByteArray();
IndexPack ip = IndexPack.create(repo, new ByteArrayInputStream(raw));
ip.setFixThin(true);
ip.index(NullProgressMonitor.INSTANCE);
ip.renameAndOpenPack();
assertTrue("has blob", wc.has(id3));
ObjectLoader ol = wc.open(id3);
assertNotNull("created loader", ol);
assertEquals(Constants.OBJ_BLOB, ol.getType());
assertEquals(data3.length, ol.getSize());
assertTrue("is large", ol.isLarge());
try {
ol.getCachedBytes();
fail("Should have thrown LargeObjectException");
} catch (LargeObjectException tooBig) {
assertEquals(MessageFormat.format(
JGitText.get().largeObjectException, id3.name()), tooBig
.getMessage());
}
ObjectStream in = ol.openStream();
assertNotNull("have stream", in);
assertEquals(Constants.OBJ_BLOB, in.getType());
assertEquals(data3.length, in.getSize());
byte[] act = new byte[data3.length];
IO.readFully(in, act, 0, data3.length);
assertTrue("same content", Arrays.equals(act, data3));
assertEquals("stream at EOF", -1, in.read());
in.close();
}
private byte[] clone(int first, byte[] base) { private byte[] clone(int first, byte[] base) {
byte[] r = new byte[base.length]; byte[] r = new byte[base.length];
System.arraycopy(base, 1, r, 1, r.length - 1); System.arraycopy(base, 1, r, 1, r.length - 1);

View File

@ -67,6 +67,8 @@
import org.eclipse.jgit.util.IO; import org.eclipse.jgit.util.IO;
public class UnpackedObjectTest extends LocalDiskRepositoryTestCase { public class UnpackedObjectTest extends LocalDiskRepositoryTestCase {
private int streamThreshold = 16 * 1024;
private TestRng rng; private TestRng rng;
private FileRepository repo; private FileRepository repo;
@ -75,6 +77,11 @@ public class UnpackedObjectTest extends LocalDiskRepositoryTestCase {
protected void setUp() throws Exception { protected void setUp() throws Exception {
super.setUp(); super.setUp();
WindowCacheConfig cfg = new WindowCacheConfig();
cfg.setStreamFileThreshold(streamThreshold);
WindowCache.reconfigure(cfg);
rng = new TestRng(getName()); rng = new TestRng(getName());
repo = createBareRepository(); repo = createBareRepository();
wc = (WindowCursor) repo.newObjectReader(); wc = (WindowCursor) repo.newObjectReader();
@ -83,6 +90,7 @@ protected void setUp() throws Exception {
protected void tearDown() throws Exception { protected void tearDown() throws Exception {
if (wc != null) if (wc != null)
wc.release(); wc.release();
WindowCache.reconfigure(new WindowCacheConfig());
super.tearDown(); super.tearDown();
} }
@ -113,7 +121,7 @@ public void testStandardFormat_SmallObject() throws Exception {
public void testStandardFormat_LargeObject() throws Exception { public void testStandardFormat_LargeObject() throws Exception {
final int type = Constants.OBJ_BLOB; final int type = Constants.OBJ_BLOB;
byte[] data = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5); byte[] data = rng.nextBytes(streamThreshold + 5);
ObjectId id = new ObjectInserter.Formatter().idFor(type, data); ObjectId id = new ObjectInserter.Formatter().idFor(type, data);
write(id, compressStandardFormat(type, data)); write(id, compressStandardFormat(type, data));
@ -268,7 +276,7 @@ public void testStandardFormat_SmallObject_TrailingGarbage()
public void testStandardFormat_LargeObject_CorruptZLibStream() public void testStandardFormat_LargeObject_CorruptZLibStream()
throws Exception { throws Exception {
final int type = Constants.OBJ_BLOB; final int type = Constants.OBJ_BLOB;
byte[] data = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5); byte[] data = rng.nextBytes(streamThreshold + 5);
ObjectId id = new ObjectInserter.Formatter().idFor(type, data); ObjectId id = new ObjectInserter.Formatter().idFor(type, data);
byte[] gz = compressStandardFormat(type, data); byte[] gz = compressStandardFormat(type, data);
gz[gz.length - 1] = 0; gz[gz.length - 1] = 0;
@ -305,7 +313,7 @@ public void testStandardFormat_LargeObject_CorruptZLibStream()
public void testStandardFormat_LargeObject_TruncatedZLibStream() public void testStandardFormat_LargeObject_TruncatedZLibStream()
throws Exception { throws Exception {
final int type = Constants.OBJ_BLOB; final int type = Constants.OBJ_BLOB;
byte[] data = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5); byte[] data = rng.nextBytes(streamThreshold + 5);
ObjectId id = new ObjectInserter.Formatter().idFor(type, data); ObjectId id = new ObjectInserter.Formatter().idFor(type, data);
byte[] gz = compressStandardFormat(type, data); byte[] gz = compressStandardFormat(type, data);
byte[] tr = new byte[gz.length - 1]; byte[] tr = new byte[gz.length - 1];
@ -339,7 +347,7 @@ public void testStandardFormat_LargeObject_TruncatedZLibStream()
public void testStandardFormat_LargeObject_TrailingGarbage() public void testStandardFormat_LargeObject_TrailingGarbage()
throws Exception { throws Exception {
final int type = Constants.OBJ_BLOB; final int type = Constants.OBJ_BLOB;
byte[] data = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5); byte[] data = rng.nextBytes(streamThreshold + 5);
ObjectId id = new ObjectInserter.Formatter().idFor(type, data); ObjectId id = new ObjectInserter.Formatter().idFor(type, data);
byte[] gz = compressStandardFormat(type, data); byte[] gz = compressStandardFormat(type, data);
byte[] tr = new byte[gz.length + 1]; byte[] tr = new byte[gz.length + 1];
@ -396,7 +404,7 @@ public void testPackFormat_SmallObject() throws Exception {
public void testPackFormat_LargeObject() throws Exception { public void testPackFormat_LargeObject() throws Exception {
final int type = Constants.OBJ_BLOB; final int type = Constants.OBJ_BLOB;
byte[] data = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5); byte[] data = rng.nextBytes(streamThreshold + 5);
ObjectId id = new ObjectInserter.Formatter().idFor(type, data); ObjectId id = new ObjectInserter.Formatter().idFor(type, data);
write(id, compressPackFormat(type, data)); write(id, compressPackFormat(type, data));

View File

@ -335,6 +335,7 @@ repositoryState_rebaseOrApplyMailbox=Rebase/Apply mailbox
repositoryState_rebaseWithMerge=Rebase w/merge repositoryState_rebaseWithMerge=Rebase w/merge
requiredHashFunctionNotAvailable=Required hash function {0} not available. requiredHashFunctionNotAvailable=Required hash function {0} not available.
resolvingDeltas=Resolving deltas resolvingDeltas=Resolving deltas
resultLengthIncorrect=result length incorrect
searchForReuse=Finding sources searchForReuse=Finding sources
sequenceTooLargeForDiffAlgorithm=Sequence too large for difference algorithm. sequenceTooLargeForDiffAlgorithm=Sequence too large for difference algorithm.
serviceNotPermitted={0} not permitted serviceNotPermitted={0} not permitted

View File

@ -395,6 +395,7 @@ public static JGitText get() {
/***/ public String repositoryState_rebaseWithMerge; /***/ public String repositoryState_rebaseWithMerge;
/***/ public String requiredHashFunctionNotAvailable; /***/ public String requiredHashFunctionNotAvailable;
/***/ public String resolvingDeltas; /***/ public String resolvingDeltas;
/***/ public String resultLengthIncorrect;
/***/ public String searchForReuse; /***/ public String searchForReuse;
/***/ public String sequenceTooLargeForDiffAlgorithm; /***/ public String sequenceTooLargeForDiffAlgorithm;
/***/ public String serviceNotPermitted; /***/ public String serviceNotPermitted;

View File

@ -60,14 +60,6 @@
* New loaders are constructed for every object. * New loaders are constructed for every object.
*/ */
public abstract class ObjectLoader { public abstract class ObjectLoader {
/**
* Default setting for the large object threshold.
* <p>
* Objects larger than this size must be accessed as a stream through the
* loader's {@link #openStream()} method.
*/
public static final int STREAM_THRESHOLD = 5 * 1024 * 1024;
/** /**
* @return Git in pack object type, see {@link Constants}. * @return Git in pack object type, see {@link Constants}.
*/ */

View File

@ -63,6 +63,7 @@
import org.eclipse.jgit.JGitText; import org.eclipse.jgit.JGitText;
import org.eclipse.jgit.errors.CorruptObjectException; import org.eclipse.jgit.errors.CorruptObjectException;
import org.eclipse.jgit.errors.LargeObjectException;
import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.errors.PackInvalidException; import org.eclipse.jgit.errors.PackInvalidException;
import org.eclipse.jgit.errors.PackMismatchException; import org.eclipse.jgit.errors.PackMismatchException;
@ -274,12 +275,11 @@ ObjectId findObjectForOffset(final long offset) throws IOException {
return getReverseIdx().findObject(offset); return getReverseIdx().findObject(offset);
} }
private final byte[] decompress(final long position, final long totalSize, private final void decompress(final long position, final WindowCursor curs,
final WindowCursor curs) throws IOException, DataFormatException { final byte[] dstbuf, final int dstoff, final int dstsz)
final byte[] dstbuf = new byte[(int) totalSize]; throws IOException, DataFormatException {
if (curs.inflate(this, position, dstbuf, 0) != totalSize) if (curs.inflate(this, position, dstbuf, dstoff) != dstsz)
throw new EOFException(MessageFormat.format(JGitText.get().shortCompressedStreamAt, position)); throw new EOFException(MessageFormat.format(JGitText.get().shortCompressedStreamAt, position));
return dstbuf;
} }
final void copyAsIs(PackOutputStream out, LocalObjectToPack src, final void copyAsIs(PackOutputStream out, LocalObjectToPack src,
@ -630,10 +630,16 @@ ObjectLoader load(final WindowCursor curs, final long pos)
case Constants.OBJ_BLOB: case Constants.OBJ_BLOB:
case Constants.OBJ_TAG: { case Constants.OBJ_TAG: {
if (sz < curs.getStreamFileThreshold()) { if (sz < curs.getStreamFileThreshold()) {
byte[] data = decompress(pos + p, sz, curs); byte[] data;
try {
data = new byte[(int) sz];
} catch (OutOfMemoryError tooBig) {
return largeWhole(curs, pos, type, sz, p);
}
decompress(pos + p, curs, data, 0, data.length);
return new ObjectLoader.SmallObject(type, data); return new ObjectLoader.SmallObject(type, data);
} }
return new LargePackedWholeObject(type, sz, pos, p, this, curs.db); return largeWhole(curs, pos, type, sz, p);
} }
case Constants.OBJ_OFS_DELTA: { case Constants.OBJ_OFS_DELTA: {
@ -680,44 +686,58 @@ private long findDeltaBase(ObjectId baseId) throws IOException,
private ObjectLoader loadDelta(long posSelf, int hdrLen, long sz, private ObjectLoader loadDelta(long posSelf, int hdrLen, long sz,
long posBase, WindowCursor curs) throws IOException, long posBase, WindowCursor curs) throws IOException,
DataFormatException { DataFormatException {
if (curs.getStreamFileThreshold() <= sz) { if (Integer.MAX_VALUE <= sz)
// The delta instruction stream itself is pretty big, and return largeDelta(posSelf, hdrLen, posBase, curs);
// that implies the resulting object is going to be massive.
// Use only the large delta format here.
//
return new LargePackedDeltaObject(posSelf, posBase, hdrLen, //
this, curs.db);
}
byte[] data; byte[] base;
int type; int type;
DeltaBaseCache.Entry e = DeltaBaseCache.get(this, posBase); DeltaBaseCache.Entry e = DeltaBaseCache.get(this, posBase);
if (e != null) { if (e != null) {
data = e.data; base = e.data;
type = e.type; type = e.type;
} else { } else {
ObjectLoader p = load(curs, posBase); ObjectLoader p = load(curs, posBase);
if (p.isLarge()) { try {
// The base itself is large. We have to produce a large base = p.getCachedBytes(curs.getStreamFileThreshold());
// delta stream as we don't want to build the whole base. } catch (LargeObjectException tooBig) {
// return largeDelta(posSelf, hdrLen, posBase, curs);
return new LargePackedDeltaObject(posSelf, posBase, hdrLen,
this, curs.db);
} }
data = p.getCachedBytes();
type = p.getType(); type = p.getType();
DeltaBaseCache.store(this, posBase, data, type); DeltaBaseCache.store(this, posBase, base, type);
} }
// At this point we have the base, and its small, and the delta final byte[] delta;
// stream also is small, so the result object cannot be more than try {
// 2x our small size. This occurs if the delta instructions were delta = new byte[(int) sz];
// "copy entire base, literal insert entire delta". Go with the } catch (OutOfMemoryError tooBig) {
// faster small object style at this point. return largeDelta(posSelf, hdrLen, posBase, curs);
// }
data = BinaryDelta.apply(data, decompress(posSelf + hdrLen, sz, curs));
return new ObjectLoader.SmallObject(type, data); decompress(posSelf + hdrLen, curs, delta, 0, delta.length);
sz = BinaryDelta.getResultSize(delta);
if (Integer.MAX_VALUE <= sz)
return largeDelta(posSelf, hdrLen, posBase, curs);
final byte[] result;
try {
result = new byte[(int) sz];
} catch (OutOfMemoryError tooBig) {
return largeDelta(posSelf, hdrLen, posBase, curs);
}
BinaryDelta.apply(base, delta, result);
return new ObjectLoader.SmallObject(type, result);
}
private LargePackedWholeObject largeWhole(final WindowCursor curs,
final long pos, final int type, long sz, int p) {
return new LargePackedWholeObject(type, sz, pos, p, this, curs.db);
}
private LargePackedDeltaObject largeDelta(long posObj, int hdrLen,
long posBase, WindowCursor wc) {
return new LargePackedDeltaObject(posObj, posBase, hdrLen, this, wc.db);
} }
byte[] getDeltaHeader(WindowCursor wc, long pos) byte[] getDeltaHeader(WindowCursor wc, long pos)

View File

@ -44,7 +44,7 @@
package org.eclipse.jgit.storage.file; package org.eclipse.jgit.storage.file;
import org.eclipse.jgit.lib.Config; import org.eclipse.jgit.lib.Config;
import org.eclipse.jgit.lib.ObjectLoader; import org.eclipse.jgit.storage.pack.PackConfig;
/** Configuration parameters for {@link WindowCache}. */ /** Configuration parameters for {@link WindowCache}. */
public class WindowCacheConfig { public class WindowCacheConfig {
@ -73,7 +73,7 @@ public WindowCacheConfig() {
packedGitWindowSize = 8 * KB; packedGitWindowSize = 8 * KB;
packedGitMMAP = false; packedGitMMAP = false;
deltaBaseCacheLimit = 10 * MB; deltaBaseCacheLimit = 10 * MB;
streamFileThreshold = ObjectLoader.STREAM_THRESHOLD; streamFileThreshold = PackConfig.DEFAULT_BIG_FILE_THRESHOLD;
} }
/** /**

View File

@ -115,6 +115,25 @@ public static long getResultSize(final byte[] delta) {
* @return patched base * @return patched base
*/ */
public static final byte[] apply(final byte[] base, final byte[] delta) { public static final byte[] apply(final byte[] base, final byte[] delta) {
return apply(base, delta, null);
}
/**
* Apply the changes defined by delta to the data in base, yielding a new
* array of bytes.
*
* @param base
* some byte representing an object of some kind.
* @param delta
* a git pack delta defining the transform from one version to
* another.
* @param result
* array to store the result into. If null the result will be
* allocated and returned.
* @return either {@code result}, or the result array allocated.
*/
public static final byte[] apply(final byte[] base, final byte[] delta,
byte[] result) {
int deltaPtr = 0; int deltaPtr = 0;
// Length of the base object (a variable length int). // Length of the base object (a variable length int).
@ -140,7 +159,12 @@ public static final byte[] apply(final byte[] base, final byte[] delta) {
shift += 7; shift += 7;
} while ((c & 0x80) != 0); } while ((c & 0x80) != 0);
final byte[] result = new byte[resLen]; if (result == null)
result = new byte[resLen];
else if (result.length != resLen)
throw new IllegalArgumentException(
JGitText.get().resultLengthIncorrect);
int resultPtr = 0; int resultPtr = 0;
while (deltaPtr < delta.length) { while (deltaPtr < delta.length) {
final int cmd = delta[deltaPtr++] & 0xff; final int cmd = delta[deltaPtr++] & 0xff;