Cache small deltas during packing

PackWriter now caches small deltas, or deltas that are very tiny
compared to their source inputs, so that the writing phase goes
faster by reusing those cached deltas.

The cached data is stored compressed, which usually translates to
a bigger footprint due to deltas being very hard to compress, but
saves time during writing by avoiding the deflate step.  They are
held under SoftReferences so that the JVM GC can clear out deltas
if memory gets very tight.  We would rather continue working and
spend a bit more CPU time during writing than crash due to OOME.

To avoid OutOfMemoryErrors during the caching phase we also trap
OOME and just abort out of the caching.

Because deflateBound() always produces something larger than what
we need to actually store the deflated data, we copy it over into
a new buffer if the actual length doesn't match the buffer length.
When packing jgit.git this saves over 111 KiB in the cache, and is
thus a worthwhile hit on CPU time.

To further save memory we store the inflated size of the delta
(which we need for the object header) in the same field as the
pathHash, as the pathHash is no longer necessary by this phase
of the packing algorithm.

Change-Id: I0da0c600d845e8ec962289751f24e65b5afa56d7
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
This commit is contained in:
Shawn O. Pearce 2010-07-09 17:19:32 -07:00
parent dfad23bf3d
commit a960d1429e
5 changed files with 377 additions and 19 deletions

View File

@ -0,0 +1,129 @@
/*
* Copyright (C) 2010, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.storage.pack;
import java.lang.ref.ReferenceQueue;
import java.lang.ref.SoftReference;
class DeltaCache {
private final long size;
private final int entryLimit;
private final ReferenceQueue<byte[]> queue;
private long used;
DeltaCache(PackWriter pw) {
size = pw.getDeltaCacheSize();
entryLimit = pw.getDeltaCacheLimit();
queue = new ReferenceQueue<byte[]>();
}
boolean canCache(int length, ObjectToPack src, ObjectToPack res) {
// If the cache would overflow, don't store.
//
if (0 < size && size < used + length) {
checkForGarbageCollectedObjects();
if (0 < size && size < used + length)
return false;
}
if (length < entryLimit) {
used += length;
return true;
}
// If the combined source files are multiple megabytes but the delta
// is on the order of a kilobyte or two, this was likely costly to
// construct. Cache it anyway, even though its over the limit.
//
if (length >> 10 < (src.getWeight() >> 20) + (res.getWeight() >> 21)) {
used += length;
return true;
}
return false;
}
void credit(int reservedSize) {
used -= reservedSize;
}
Ref cache(byte[] data, int actLen, int reservedSize) {
// The caller may have had to allocate more space than is
// required. If we are about to waste anything, shrink it.
//
if (data.length != actLen) {
byte[] nbuf = new byte[actLen];
System.arraycopy(data, 0, nbuf, 0, actLen);
data = nbuf;
}
// When we reserved space for this item we did it for the
// inflated size of the delta, but we were just given the
// compressed version. Adjust the cache cost to match.
//
if (reservedSize != data.length) {
used -= reservedSize;
used += data.length;
}
return new Ref(data, queue);
}
private void checkForGarbageCollectedObjects() {
Ref r;
while ((r = (Ref) queue.poll()) != null)
used -= r.cost;
}
static class Ref extends SoftReference<byte[]> {
final int cost;
Ref(byte[] array, ReferenceQueue<byte[]> queue) {
super(array, queue);
cost = array.length;
}
}
}

View File

@ -43,7 +43,10 @@
package org.eclipse.jgit.storage.pack;
import java.io.EOFException;
import java.io.IOException;
import java.io.OutputStream;
import java.util.zip.Deflater;
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
import org.eclipse.jgit.errors.LargeObjectException;
@ -59,6 +62,8 @@ class DeltaWindow {
private final PackWriter writer;
private final DeltaCache deltaCache;
private final ObjectReader reader;
private final DeltaWindowEntry[] window;
@ -87,8 +92,12 @@ class DeltaWindow {
/** If we have {@link #bestDelta}, the window position it was created by. */
private int bestSlot;
DeltaWindow(PackWriter pw, ObjectReader or) {
/** Used to compress cached deltas. */
private Deflater deflater;
DeltaWindow(PackWriter pw, DeltaCache dc, ObjectReader or) {
writer = pw;
deltaCache = dc;
reader = or;
// C Git increases the window size supplied by the user by 1.
@ -111,25 +120,31 @@ class DeltaWindow {
void search(ProgressMonitor monitor, ObjectToPack[] toSearch, int off,
int cnt) throws IOException {
for (int end = off + cnt; off < end; off++) {
monitor.update(1);
try {
for (int end = off + cnt; off < end; off++) {
monitor.update(1);
res = window[resSlot];
res.set(toSearch[off]);
res = window[resSlot];
res.set(toSearch[off]);
if (res.object.isDoNotDelta()) {
// PackWriter marked edge objects with the do-not-delta flag.
// They are the only ones that appear in toSearch with it set,
// but we don't actually want to make a delta for them, just
// need to push them into the window so they can be read by
// other objects coming through.
//
keepInWindow();
} else {
// Search for a delta for the current window slot.
//
search();
if (res.object.isDoNotDelta()) {
// PackWriter marked edge objects with the
// do-not-delta flag. They are the only ones
// that appear in toSearch with it set, but
// we don't actually want to make a delta for
// them, just need to push them into the window
// so they can be read by other objects.
//
keepInWindow();
} else {
// Search for a delta for the current window slot.
//
search();
}
}
} finally {
if (deflater != null)
deflater.end();
}
}
@ -184,6 +199,7 @@ private void search() throws IOException {
}
resObj.setDeltaDepth(srcObj.getDeltaDepth() + 1);
resObj.clearReuseAsIs();
cacheDelta(srcObj, resObj);
// Discard the cached best result, otherwise it leaks.
//
@ -275,6 +291,33 @@ private int delta(final DeltaWindowEntry src, final int srcSlot)
return NEXT_SRC;
}
private void cacheDelta(ObjectToPack srcObj, ObjectToPack resObj) {
if (Integer.MAX_VALUE < bestDelta.length())
return;
int rawsz = (int) bestDelta.length();
if (deltaCache.canCache(rawsz, srcObj, resObj)) {
try {
byte[] zbuf = new byte[deflateBound(rawsz)];
ZipStream zs = new ZipStream(deflater(), zbuf);
bestDelta.writeTo(zs, null);
int len = zs.finish();
resObj.setCachedDelta(deltaCache.cache(zbuf, len, rawsz));
resObj.setCachedSize(rawsz);
} catch (IOException err) {
deltaCache.credit(rawsz);
} catch (OutOfMemoryError err) {
deltaCache.credit(rawsz);
}
}
}
private static int deflateBound(int insz) {
return insz + ((insz + 7) >> 3) + ((insz + 63) >> 6) + 11;
}
private void shuffleBaseUpInPriority() {
// Shuffle the entire window so that the best match we just used
// is at our current index, and our current object is at the index
@ -366,4 +409,63 @@ private byte[] buffer(DeltaWindowEntry ent) throws MissingObjectException,
ent.buffer = buf = writer.buffer(reader, ent.object);
return buf;
}
}
private Deflater deflater() {
if (deflater == null)
deflater = new Deflater(writer.getCompressionLevel());
else
deflater.reset();
return deflater;
}
static final class ZipStream extends OutputStream {
private final Deflater deflater;
private final byte[] zbuf;
private int outPtr;
ZipStream(Deflater deflater, byte[] zbuf) {
this.deflater = deflater;
this.zbuf = zbuf;
}
int finish() throws IOException {
deflater.finish();
for (;;) {
if (outPtr == zbuf.length)
throw new EOFException();
int n = deflater.deflate(zbuf, outPtr, zbuf.length - outPtr);
if (n == 0) {
if (deflater.finished())
return outPtr;
throw new IOException();
}
outPtr += n;
}
}
@Override
public void write(byte[] b, int off, int len) throws IOException {
deflater.setInput(b, off, len);
for (;;) {
if (outPtr == zbuf.length)
throw new EOFException();
int n = deflater.deflate(zbuf, outPtr, zbuf.length - outPtr);
if (n == 0) {
if (deflater.needsInput())
break;
throw new IOException();
}
outPtr += n;
}
}
@Override
public void write(int b) throws IOException {
throw new UnsupportedOperationException();
}
}
}

View File

@ -90,6 +90,9 @@ public class ObjectToPack extends PackedObjectInfo {
/** Hash of the object's tree path. */
private int pathHash;
/** If present, deflated delta instruction stream for this object. */
private DeltaCache.Ref cachedDelta;
/**
* Construct for the specified object id.
*
@ -150,8 +153,25 @@ void setDeltaBase(ObjectId deltaBase) {
this.deltaBase = deltaBase;
}
void setCachedDelta(DeltaCache.Ref data){
cachedDelta = data;
}
DeltaCache.Ref popCachedDelta() {
DeltaCache.Ref r = cachedDelta;
if (r != null)
cachedDelta = null;
return r;
}
void clearDeltaBase() {
this.deltaBase = null;
if (cachedDelta != null) {
cachedDelta.clear();
cachedDelta.enqueue();
cachedDelta = null;
}
}
/**
@ -248,6 +268,14 @@ void setPathHash(int hc) {
pathHash = hc;
}
int getCachedSize() {
return pathHash;
}
void setCachedSize(int sz) {
pathHash = sz;
}
/**
* Remember a specific representation for reuse at a later time.
* <p>

View File

@ -61,6 +61,10 @@ public PackConfig parse(final Config cfg) {
final int deltaDepth;
final long deltaCacheSize;
final int deltaCacheLimit;
final int compression;
final int indexVersion;
@ -70,6 +74,8 @@ public PackConfig parse(final Config cfg) {
private PackConfig(Config rc) {
deltaWindow = rc.getInt("pack", "window", PackWriter.DEFAULT_DELTA_SEARCH_WINDOW_SIZE);
deltaWindowMemory = rc.getLong("pack", null, "windowmemory", 0);
deltaCacheSize = rc.getLong("pack", null, "deltacachesize", PackWriter.DEFAULT_DELTA_CACHE_SIZE);
deltaCacheLimit = rc.getInt("pack", "deltacachelimit", PackWriter.DEFAULT_DELTA_CACHE_LIMIT);
deltaDepth = rc.getInt("pack", "depth", PackWriter.DEFAULT_MAX_DELTA_DEPTH);
compression = compression(rc);
indexVersion = rc.getInt("pack", "indexversion", 2);

View File

@ -179,6 +179,10 @@ public class PackWriter {
static final long DEFAULT_BIG_FILE_THRESHOLD = 50 * 1024 * 1024;
static final long DEFAULT_DELTA_CACHE_SIZE = 50 * 1024 * 1024;
static final int DEFAULT_DELTA_CACHE_LIMIT = 100;
private static final int PACK_VERSION_GENERATED = 2;
@SuppressWarnings("unchecked")
@ -221,6 +225,10 @@ public class PackWriter {
private int deltaSearchWindowSize = DEFAULT_DELTA_SEARCH_WINDOW_SIZE;
private long deltaCacheSize = DEFAULT_DELTA_CACHE_SIZE;
private int deltaCacheLimit = DEFAULT_DELTA_CACHE_LIMIT;
private int indexVersion;
private long bigFileThreshold = DEFAULT_BIG_FILE_THRESHOLD;
@ -275,6 +283,8 @@ public PackWriter(final Repository repo, final ObjectReader reader) {
final PackConfig pc = configOf(repo).get(PackConfig.KEY);
deltaSearchWindowSize = pc.deltaWindow;
deltaCacheSize = pc.deltaCacheSize;
deltaCacheLimit = pc.deltaCacheLimit;
maxDeltaDepth = pc.deltaDepth;
compressionLevel = pc.compression;
indexVersion = pc.indexVersion;
@ -465,6 +475,57 @@ public void setDeltaSearchWindowSize(int objectCount) {
deltaSearchWindowSize = objectCount;
}
/**
* Get the size of the in-memory delta cache.
*
* @return maximum number of bytes worth of delta data to cache in memory.
* If 0 the cache is infinite in size (up to the JVM heap limit
* anyway). A very tiny size such as 1 indicates the cache is
* effectively disabled.
*/
public long getDeltaCacheSize() {
return deltaCacheSize;
}
/**
* Set the maximum number of bytes of delta data to cache.
* <p>
* During delta search, up to this many bytes worth of small or hard to
* compute deltas will be stored in memory. This cache speeds up writing by
* allowing the cached entry to simply be dumped to the output stream.
*
* @param size
* number of bytes to cache. Set to 0 to enable an infinite
* cache, set to 1 (an impossible size for any delta) to disable
* the cache.
*/
public void setDeltaCacheSize(long size) {
deltaCacheSize = size;
}
/**
* Maximum size in bytes of a delta to cache.
*
* @return maximum size (in bytes) of a delta that should be cached.
*/
public int getDeltaCacheLimit() {
return deltaCacheLimit;
}
/**
* Set the maximum size of a delta that should be cached.
* <p>
* During delta search, any delta smaller than this size will be cached, up
* to the {@link #getDeltaCacheSize()} maximum limit. This speeds up writing
* by allowing these cached deltas to be output as-is.
*
* @param size
* maximum size (in bytes) of a delta to be cached.
*/
public void setDeltaCacheLimit(int size) {
deltaCacheLimit = size;
}
/**
* Get the maximum file size that will be delta compressed.
* <p>
@ -488,6 +549,27 @@ public void setBigFileThreshold(long bigFileThreshold) {
this.bigFileThreshold = bigFileThreshold;
}
/**
* Get the compression level applied to objects in the pack.
*
* @return current compression level, see {@link java.util.zip.Deflater}.
*/
public int getCompressionLevel() {
return compressionLevel;
}
/**
* Set the compression level applied to objects in the pack.
*
* @param level
* compression level, must be a valid level recognized by the
* {@link java.util.zip.Deflater} class. Typically this setting
* is {@link java.util.zip.Deflater#BEST_SPEED}.
*/
public void setCompressionLevel(int level) {
compressionLevel = level;
}
/** @return true if this writer is producing a thin pack. */
public boolean isThin() {
return thin;
@ -846,7 +928,8 @@ private boolean loadSize(ObjectToPack e) throws MissingObjectException,
private void searchForDeltas(ProgressMonitor monitor,
ObjectToPack[] list, int cnt) throws MissingObjectException,
IncorrectObjectTypeException, LargeObjectException, IOException {
DeltaWindow dw = new DeltaWindow(this, reader);
DeltaCache dc = new DeltaCache(this);
DeltaWindow dw = new DeltaWindow(this, dc, reader);
dw.search(monitor, list, 0, cnt);
}
@ -955,6 +1038,16 @@ private void writeWholeObjectDeflate(PackOutputStream out,
private void writeDeltaObjectDeflate(PackOutputStream out,
final ObjectToPack otp) throws IOException {
DeltaCache.Ref ref = otp.popCachedDelta();
if (ref != null) {
byte[] zbuf = ref.get();
if (zbuf != null) {
out.writeHeader(otp, otp.getCachedSize());
out.write(zbuf);
return;
}
}
TemporaryBuffer.Heap delta = delta(otp);
out.writeHeader(otp, delta.length());