diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/DeltaWindow.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/DeltaWindow.java new file mode 100644 index 000000000..4ae4eb8fa --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/DeltaWindow.java @@ -0,0 +1,369 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.storage.pack; + +import java.io.IOException; + +import org.eclipse.jgit.errors.IncorrectObjectTypeException; +import org.eclipse.jgit.errors.LargeObjectException; +import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.lib.ObjectReader; +import org.eclipse.jgit.lib.ProgressMonitor; +import org.eclipse.jgit.util.TemporaryBuffer; + +class DeltaWindow { + private static final int NEXT_RES = 0; + + private static final int NEXT_SRC = 1; + + private final PackWriter writer; + + private final ObjectReader reader; + + private final DeltaWindowEntry[] window; + + /** Maximum depth we should create for any delta chain. */ + private final int maxDepth; + + // The object we are currently considering needs a lot of state: + + /** Position of {@link #res} within {@link #window} array. */ + private int resSlot; + + /** + * Maximum delta chain depth the current object can have. + *

+ * This can be smaller than {@link #maxDepth}. + */ + private int resMaxDepth; + + /** Window entry of the object we are currently considering. */ + private DeltaWindowEntry res; + + /** If we have a delta for {@link #res}, this is the shortest found yet. */ + private TemporaryBuffer.Heap bestDelta; + + /** If we have {@link #bestDelta}, the window position it was created by. */ + private int bestSlot; + + DeltaWindow(PackWriter pw, ObjectReader or) { + writer = pw; + reader = or; + + // C Git increases the window size supplied by the user by 1. + // We don't know why it does this, but if the user asks for + // window=10, it actually processes with window=11. Because + // the window size has the largest direct impact on the final + // pack file size, we match this odd behavior here to give us + // a better chance of producing a similar sized pack as C Git. + // + // We would prefer to directly honor the user's request since + // PackWriter has a minimum of 2 for the window size, but then + // users might complain that JGit is creating a bigger pack file. + // + window = new DeltaWindowEntry[pw.getDeltaSearchWindowSize() + 1]; + for (int i = 0; i < window.length; i++) + window[i] = new DeltaWindowEntry(); + + maxDepth = pw.getMaxDeltaDepth(); + } + + void search(ProgressMonitor monitor, ObjectToPack[] toSearch, int off, + int cnt) throws IOException { + for (int end = off + cnt; off < end; off++) { + monitor.update(1); + + res = window[resSlot]; + res.set(toSearch[off]); + + if (res.object.isDoNotDelta()) { + // PackWriter marked edge objects with the do-not-delta flag. + // They are the only ones that appear in toSearch with it set, + // but we don't actually want to make a delta for them, just + // need to push them into the window so they can be read by + // other objects coming through. + // + keepInWindow(); + } else { + // Search for a delta for the current window slot. + // + search(); + } + } + } + + private void search() throws IOException { + // TODO(spearce) If the object is used as a base for other + // objects in this pack we should limit the depth we create + // for ourselves to be the remainder of our longest dependent + // chain and the configured maximum depth. This can happen + // when the dependents are being reused out a pack, but we + // cannot be because we are near the edge of a thin pack. + // + resMaxDepth = maxDepth; + + // Loop through the window backwards, considering every entry. + // This lets us look at the bigger objects that came before. + // + for (int srcSlot = prior(resSlot); srcSlot != resSlot; srcSlot = prior(srcSlot)) { + DeltaWindowEntry src = window[srcSlot]; + if (src.empty()) + break; + if (delta(src, srcSlot) == NEXT_RES) { + bestDelta = null; + return; + } + } + + // We couldn't find a suitable delta for this object, but it may + // still be able to act as a base for another one. + // + if (bestDelta == null) { + keepInWindow(); + return; + } + + // Select this best matching delta as the base for the object. + // + ObjectToPack srcObj = window[bestSlot].object; + ObjectToPack resObj = res.object; + if (srcObj.isDoNotDelta()) { + // The source (the delta base) is an edge object outside of the + // pack. Its part of the common base set that the peer already + // has on hand, so we don't want to send it. We have to store + // an ObjectId and *NOT* an ObjectToPack for the base to ensure + // the base isn't included in the outgoing pack file. + // + resObj.setDeltaBase(srcObj.copy()); + } else { + // The base is part of the pack we are sending, so it should be + // a direct pointer to the base. + // + resObj.setDeltaBase(srcObj); + } + resObj.setDeltaDepth(srcObj.getDeltaDepth() + 1); + resObj.clearReuseAsIs(); + + // Discard the cached best result, otherwise it leaks. + // + bestDelta = null; + + // If this should be the end of a chain, don't keep + // it in the window. Just move on to the next object. + // + if (resObj.getDeltaDepth() == maxDepth) + return; + + shuffleBaseUpInPriority(); + keepInWindow(); + } + + private int delta(final DeltaWindowEntry src, final int srcSlot) + throws IOException { + // Objects must use only the same type as their delta base. + // If we are looking at something where that isn't true we + // have exhausted everything of the correct type and should + // move on to the next thing to examine. + // + if (src.type() != res.type()) { + keepInWindow(); + return NEXT_RES; + } + + // Only consider a source with a short enough delta chain. + if (src.depth() > resMaxDepth) + return NEXT_SRC; + + // Estimate a reasonable upper limit on delta size. + int msz = deltaSizeLimit(res, resMaxDepth, src); + if (msz <= 8) + return NEXT_SRC; + + // If we have to insert a lot to make this work, find another. + if (res.size() - src.size() > msz) + return NEXT_SRC; + + // If the sizes are radically different, this is a bad pairing. + if (res.size() < src.size() / 16) + return NEXT_SRC; + + DeltaIndex srcIndex; + try { + srcIndex = index(src); + } catch (LargeObjectException tooBig) { + // If the source is too big to work on, skip it. + dropFromWindow(srcSlot); + return NEXT_SRC; + } catch (IOException notAvailable) { + if (src.object.isDoNotDelta()) { + // This is an edge that is suddenly not available. + dropFromWindow(srcSlot); + return NEXT_SRC; + } else { + throw notAvailable; + } + } + + byte[] resBuf; + try { + resBuf = buffer(res); + } catch (LargeObjectException tooBig) { + // If its too big, move on to another item. + return NEXT_RES; + } + + // If we already have a delta for the current object, abort + // encoding early if this new pairing produces a larger delta. + if (bestDelta != null && bestDelta.length() < msz) + msz = (int) bestDelta.length(); + + TemporaryBuffer.Heap delta = new TemporaryBuffer.Heap(msz); + try { + if (!srcIndex.encode(delta, resBuf, msz)) + return NEXT_SRC; + } catch (IOException deltaTooBig) { + // This only happens when the heap overflows our limit. + return NEXT_SRC; + } + + if (isBetterDelta(src, delta)) { + bestDelta = delta; + bestSlot = srcSlot; + } + + return NEXT_SRC; + } + + private void shuffleBaseUpInPriority() { + // Shuffle the entire window so that the best match we just used + // is at our current index, and our current object is at the index + // before it. Slide any entries in between to make space. + // + window[resSlot] = window[bestSlot]; + + DeltaWindowEntry next = res; + int slot = prior(resSlot); + for (; slot != bestSlot; slot = prior(slot)) { + DeltaWindowEntry e = window[slot]; + window[slot] = next; + next = e; + } + window[slot] = next; + } + + private void keepInWindow() { + if (++resSlot == window.length) + resSlot = 0; + } + + private int prior(int slot) { + if (slot == 0) + return window.length - 1; + return slot - 1; + } + + private void dropFromWindow(@SuppressWarnings("unused") int srcSlot) { + // We should drop the current source entry from the window, + // it is somehow invalid for us to work with. + } + + private boolean isBetterDelta(DeltaWindowEntry src, + TemporaryBuffer.Heap resDelta) { + if (bestDelta == null) + return true; + + // If both delta sequences are the same length, use the one + // that has a shorter delta chain since it would be faster + // to access during reads. + // + if (resDelta.length() == bestDelta.length()) + return src.depth() < window[bestSlot].depth(); + + return resDelta.length() < bestDelta.length(); + } + + private static int deltaSizeLimit(DeltaWindowEntry res, int maxDepth, + DeltaWindowEntry src) { + // Ideally the delta is at least 50% of the original size, + // but we also want to account for delta header overhead in + // the pack file (to point to the delta base) so subtract off + // some of those header bytes from the limit. + // + final int limit = res.size() / 2 - 20; + + // Distribute the delta limit over the entire chain length. + // This is weighted such that deeper items in the chain must + // be even smaller than if they were earlier in the chain, as + // they cost significantly more to unpack due to the increased + // number of recursive unpack calls. + // + final int remainingDepth = maxDepth - src.depth(); + return (limit * remainingDepth) / maxDepth; + } + + private DeltaIndex index(DeltaWindowEntry ent) + throws MissingObjectException, IncorrectObjectTypeException, + IOException, LargeObjectException { + DeltaIndex idx = ent.index; + if (idx == null) { + try { + idx = new DeltaIndex(buffer(ent)); + } catch (OutOfMemoryError noMemory) { + LargeObjectException e = new LargeObjectException(ent.object); + e.initCause(noMemory); + throw e; + } + ent.index = idx; + } + return idx; + } + + private byte[] buffer(DeltaWindowEntry ent) throws MissingObjectException, + IncorrectObjectTypeException, IOException, LargeObjectException { + byte[] buf = ent.buffer; + if (buf == null) + ent.buffer = buf = writer.buffer(reader, ent.object); + return buf; + } +} \ No newline at end of file diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/DeltaWindowEntry.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/DeltaWindowEntry.java new file mode 100644 index 000000000..0f1e6329f --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/DeltaWindowEntry.java @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.storage.pack; + +class DeltaWindowEntry { + ObjectToPack object; + + /** Complete contents of this object. Lazily loaded. */ + byte[] buffer; + + /** Index of this object's content, to encode other deltas. Lazily loaded. */ + DeltaIndex index; + + void set(ObjectToPack object) { + this.object = object; + this.index = null; + this.buffer = null; + } + + /** @return current delta chain depth of this object. */ + int depth() { + return object.getDeltaDepth(); + } + + /** @return type of the object in this window entry. */ + int type() { + return object.getType(); + } + + /** @return estimated unpacked size of the object, in bytes . */ + int size() { + return object.getWeight(); + } + + /** @return true if there is no object stored in this entry. */ + boolean empty() { + return object == null; + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java index d96d5ddfd..f88f2635e 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java @@ -48,11 +48,14 @@ import static org.eclipse.jgit.storage.pack.StoredObjectRepresentation.PACK_WHOLE; import java.io.IOException; +import java.io.InputStream; import java.io.OutputStream; import java.security.MessageDigest; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.Comparator; import java.util.Iterator; import java.util.List; import java.util.zip.Deflater; @@ -61,6 +64,7 @@ import org.eclipse.jgit.JGitText; import org.eclipse.jgit.errors.CorruptObjectException; import org.eclipse.jgit.errors.IncorrectObjectTypeException; +import org.eclipse.jgit.errors.LargeObjectException; import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.errors.StoredObjectRepresentationNotAvailableException; import org.eclipse.jgit.lib.AnyObjectId; @@ -78,6 +82,8 @@ import org.eclipse.jgit.revwalk.RevObject; import org.eclipse.jgit.revwalk.RevSort; import org.eclipse.jgit.storage.file.PackIndexWriter; +import org.eclipse.jgit.util.IO; +import org.eclipse.jgit.util.TemporaryBuffer; /** *

@@ -716,6 +722,8 @@ public void writePack(ProgressMonitor compressMonitor, if ((reuseDeltas || reuseObjects) && reuseSupport != null) searchForReuse(); + if (deltaCompress) + searchForDeltas(compressMonitor); final PackOutputStream out = new PackOutputStream(writeMonitor, packStream, this); @@ -745,6 +753,103 @@ private void searchForReuse() throws IOException { } } + private void searchForDeltas(ProgressMonitor monitor) + throws MissingObjectException, IncorrectObjectTypeException, + IOException { + // Commits and annotated tags tend to have too many differences to + // really benefit from delta compression. Consequently just don't + // bother examining those types here. + // + ObjectToPack[] list = new ObjectToPack[ + objectsLists[Constants.OBJ_TREE].size() + + objectsLists[Constants.OBJ_BLOB].size() + + edgeObjects.size()]; + int cnt = 0; + cnt = findObjectsNeedingDelta(list, cnt, Constants.OBJ_TREE); + cnt = findObjectsNeedingDelta(list, cnt, Constants.OBJ_BLOB); + if (cnt == 0) + return; + + // Queue up any edge objects that we might delta against. We won't + // be sending these as we assume the other side has them, but we need + // them in the search phase below. + // + for (ObjectToPack eo : edgeObjects) { + try { + if (loadSize(eo)) + list[cnt++] = eo; + } catch (IOException notAvailable) { + // Skip this object. Since we aren't going to write it out + // the only consequence of it being unavailable to us is we + // may produce a larger data stream than we could have. + // + if (!ignoreMissingUninteresting) + throw notAvailable; + } + } + + monitor.beginTask(COMPRESSING_OBJECTS_PROGRESS, cnt); + + // Sort the objects by path hash so like files are near each other, + // and then by size descending so that bigger files are first. This + // applies "Linus' Law" which states that newer files tend to be the + // bigger ones, because source files grow and hardly ever shrink. + // + Arrays.sort(list, 0, cnt, new Comparator() { + public int compare(ObjectToPack a, ObjectToPack b) { + int cmp = a.getType() - b.getType(); + if (cmp == 0) + cmp = (a.getPathHash() >>> 1) - (b.getPathHash() >>> 1); + if (cmp == 0) + cmp = (a.getPathHash() & 1) - (b.getPathHash() & 1); + if (cmp == 0) + cmp = b.getWeight() - a.getWeight(); + return cmp; + } + }); + searchForDeltas(monitor, list, cnt); + monitor.endTask(); + } + + private int findObjectsNeedingDelta(ObjectToPack[] list, int cnt, int type) + throws MissingObjectException, IncorrectObjectTypeException, + IOException { + for (ObjectToPack otp : objectsLists[type]) { + if (otp.isDoNotDelta()) // delta is disabled for this path + continue; + if (otp.isDeltaRepresentation()) // already reusing a delta + continue; + if (loadSize(otp)) + list[cnt++] = otp; + } + return cnt; + } + + private boolean loadSize(ObjectToPack e) throws MissingObjectException, + IncorrectObjectTypeException, IOException { + long sz = reader.getObjectSize(e, e.getType()); + + // If its too big for us to handle, skip over it. + // + if (bigFileThreshold <= sz || Integer.MAX_VALUE <= sz) + return false; + + // If its too tiny for the delta compression to work, skip it. + // + if (sz <= DeltaIndex.BLKSZ) + return false; + + e.setWeight((int) sz); + return true; + } + + private void searchForDeltas(ProgressMonitor monitor, + ObjectToPack[] list, int cnt) throws MissingObjectException, + IncorrectObjectTypeException, LargeObjectException, IOException { + DeltaWindow dw = new DeltaWindow(this, reader); + dw.search(monitor, list, 0, cnt); + } + private void writeObjects(ProgressMonitor writeMonitor, PackOutputStream out) throws IOException { for (List list : objectsLists) { @@ -793,7 +898,10 @@ private void writeObject(PackOutputStream out, final ObjectToPack otp) // If we reached here, reuse wasn't possible. // - writeWholeObjectDeflate(out, otp); + if (otp.isDeltaRepresentation()) + writeDeltaObjectDeflate(out, otp); + else + writeWholeObjectDeflate(out, otp); out.endObject(); otp.setCRC(out.getCRC32()); } @@ -845,6 +953,70 @@ private void writeWholeObjectDeflate(PackOutputStream out, dst.finish(); } + private void writeDeltaObjectDeflate(PackOutputStream out, + final ObjectToPack otp) throws IOException { + TemporaryBuffer.Heap delta = delta(otp); + out.writeHeader(otp, delta.length()); + + Deflater deflater = deflater(); + deflater.reset(); + DeflaterOutputStream dst = new DeflaterOutputStream(out, deflater); + delta.writeTo(dst, null); + dst.finish(); + } + + private TemporaryBuffer.Heap delta(final ObjectToPack otp) + throws IOException { + DeltaIndex index = new DeltaIndex(buffer(reader, otp.getDeltaBaseId())); + byte[] res = buffer(reader, otp); + + // We never would have proposed this pair if the delta would be + // larger than the unpacked version of the object. So using it + // as our buffer limit is valid: we will never reach it. + // + TemporaryBuffer.Heap delta = new TemporaryBuffer.Heap(res.length); + index.encode(delta, res); + return delta; + } + + byte[] buffer(ObjectReader or, AnyObjectId objId) throws IOException { + ObjectLoader ldr = or.open(objId); + if (!ldr.isLarge()) + return ldr.getCachedBytes(); + + // PackWriter should have already pruned objects that + // are above the big file threshold, so our chances of + // the object being below it are very good. We really + // shouldn't be here, unless the implementation is odd. + + // If it really is too big to work with, abort out now. + // + long sz = ldr.getSize(); + if (getBigFileThreshold() <= sz || Integer.MAX_VALUE < sz) + throw new LargeObjectException(objId.copy()); + + // Its considered to be large by the loader, but we really + // want it in byte array format. Try to make it happen. + // + byte[] buf; + try { + buf = new byte[(int) sz]; + } catch (OutOfMemoryError noMemory) { + LargeObjectException e; + + e = new LargeObjectException(objId.copy()); + e.initCause(noMemory); + throw e; + } + InputStream in = ldr.openStream(); + try { + IO.readFully(in, buf, 0, buf.length); + } finally { + in.close(); + } + return buf; + } + private Deflater deflater() { if (myDeflater == null) myDeflater = new Deflater(compressionLevel);