From ece88b99eb2ea6541b667aa066573184c25b6a8b Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Sat, 26 Jun 2010 14:46:05 -0700 Subject: [PATCH] Redo PackWriter object reuse output Output of selected reuses is refactored to use a new ObjectReuseAsIs interface that extends the ObjectReader. This interface allows the reader to control how it performs the reuse into the output stream, but also allows it to throw an exception to request the writer to find a different candidate representation. The PackFile reuse code was overhauled, cleaning up the APIs so they aren't exposed in the object loader, but instead are now a single method on the PackFile itself. The reuse algorithm was changed to do a data verification pass, followed by the copy pass to the output. This permits us to work around a corrupt object in a pack file by seeking another copy of that object when this one is bad. The reuse code was also optimized for the common case, where the in-pack representation is under 16 KiB. In these smaller cases data is sent to the pack writer more directly, avoiding some copying. Change-Id: I6350c2b444118305e8446ce1dfd049259832bcca Signed-off-by: Shawn O. Pearce --- ...ctRepresentationNotAvailableException.java | 61 ++++ .../org/eclipse/jgit/lib/ByteArrayWindow.java | 14 - .../eclipse/jgit/lib/ByteBufferWindow.java | 18 -- .../src/org/eclipse/jgit/lib/ByteWindow.java | 10 - .../eclipse/jgit/lib/LocalObjectToPack.java | 10 +- .../org/eclipse/jgit/lib/ObjectReuseAsIs.java | 37 +++ .../src/org/eclipse/jgit/lib/PackFile.java | 269 ++++++++++++++---- .../eclipse/jgit/lib/PackOutputStream.java | 90 +++++- .../src/org/eclipse/jgit/lib/PackWriter.java | 205 ++++++------- .../eclipse/jgit/lib/PackedObjectLoader.java | 64 ----- .../org/eclipse/jgit/lib/WindowCursor.java | 18 +- .../src/org/eclipse/jgit/util/LongList.java | 14 + 12 files changed, 514 insertions(+), 296 deletions(-) create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/errors/StoredObjectRepresentationNotAvailableException.java diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/errors/StoredObjectRepresentationNotAvailableException.java b/org.eclipse.jgit/src/org/eclipse/jgit/errors/StoredObjectRepresentationNotAvailableException.java new file mode 100644 index 000000000..cff449927 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/errors/StoredObjectRepresentationNotAvailableException.java @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.errors; + +import org.eclipse.jgit.lib.ObjectToPack; + +/** A previously selected representation is no longer available. */ +public class StoredObjectRepresentationNotAvailableException extends Exception { + private static final long serialVersionUID = 1L; + + /** + * Construct an error for an object. + * + * @param otp + * the object whose current representation is no longer present. + */ + public StoredObjectRepresentationNotAvailableException(ObjectToPack otp) { + // Do nothing. + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ByteArrayWindow.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ByteArrayWindow.java index 804261031..4f2373d3d 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ByteArrayWindow.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ByteArrayWindow.java @@ -80,18 +80,4 @@ protected int inflate(final int pos, final byte[] b, int o, o += inf.inflate(b, o, b.length - o); return o; } - - @Override - protected void inflateVerify(final int pos, final Inflater inf) - throws DataFormatException { - while (!inf.finished()) { - if (inf.needsInput()) { - inf.setInput(array, pos, array.length - pos); - break; - } - inf.inflate(verifyGarbageBuffer, 0, verifyGarbageBuffer.length); - } - while (!inf.finished() && !inf.needsInput()) - inf.inflate(verifyGarbageBuffer, 0, verifyGarbageBuffer.length); - } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ByteBufferWindow.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ByteBufferWindow.java index 1b29934d2..794d7428e 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ByteBufferWindow.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ByteBufferWindow.java @@ -89,22 +89,4 @@ protected int inflate(final int pos, final byte[] b, int o, o += inf.inflate(b, o, b.length - o); return o; } - - @Override - protected void inflateVerify(final int pos, final Inflater inf) - throws DataFormatException { - final byte[] tmp = new byte[512]; - final ByteBuffer s = buffer.slice(); - s.position(pos); - while (s.remaining() > 0 && !inf.finished()) { - if (inf.needsInput()) { - final int n = Math.min(s.remaining(), tmp.length); - s.get(tmp, 0, n); - inf.setInput(tmp, 0, n); - } - inf.inflate(verifyGarbageBuffer, 0, verifyGarbageBuffer.length); - } - while (!inf.finished() && !inf.needsInput()) - inf.inflate(verifyGarbageBuffer, 0, verifyGarbageBuffer.length); - } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ByteWindow.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ByteWindow.java index cbef4218a..69d255c78 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ByteWindow.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ByteWindow.java @@ -172,14 +172,4 @@ final int inflate(long pos, byte[] dstbuf, int dstoff, Inflater inf) */ protected abstract int inflate(int pos, byte[] dstbuf, int dstoff, Inflater inf) throws DataFormatException; - - protected static final byte[] verifyGarbageBuffer = new byte[2048]; - - final void inflateVerify(final long pos, final Inflater inf) - throws DataFormatException { - inflateVerify((int) (pos - start), inf); - } - - protected abstract void inflateVerify(int pos, Inflater inf) - throws DataFormatException; } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/LocalObjectToPack.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/LocalObjectToPack.java index 516cf631c..8db58707e 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/LocalObjectToPack.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/LocalObjectToPack.java @@ -43,26 +43,20 @@ package org.eclipse.jgit.lib; -import java.io.IOException; - import org.eclipse.jgit.revwalk.RevObject; /** {@link ObjectToPack} for {@link ObjectDirectory}. */ class LocalObjectToPack extends ObjectToPack { /** Pack to reuse compressed data from, otherwise null. */ - private PackFile copyFromPack; + PackFile copyFromPack; /** Offset of the object's header in {@link #copyFromPack}. */ - private long copyOffset; + long copyOffset; LocalObjectToPack(RevObject obj) { super(obj); } - PackedObjectLoader getCopyLoader(WindowCursor curs) throws IOException { - return copyFromPack.resolveBase(curs, copyOffset); - } - @Override public void select(StoredObjectRepresentation ref) { LocalObjectRepresentation ptr = (LocalObjectRepresentation)ref; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectReuseAsIs.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectReuseAsIs.java index f7aebf124..f87b8301c 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectReuseAsIs.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectReuseAsIs.java @@ -46,6 +46,7 @@ import java.io.IOException; import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.errors.StoredObjectRepresentationNotAvailableException; import org.eclipse.jgit.revwalk.RevObject; /** @@ -95,4 +96,40 @@ public interface ObjectReuseAsIs { */ public void selectObjectRepresentation(PackWriter packer, ObjectToPack otp) throws IOException, MissingObjectException; + + /** + * Output a previously selected representation. + *

+ * {@code PackWriter} invokes this method only if a representation + * previously given to it by {@code selectObjectRepresentation} was chosen + * for reuse into the output stream. The {@code otp} argument is an instance + * created by this reader's own {@code newObjectToPack}, and the + * representation data saved within it also originated from this reader. + *

+ * Implementors must write the object header before copying the raw data to + * the output stream. The typical implementation is like: + * + *

+	 * MyToPack mtp = (MyToPack) otp;
+	 * byte[] raw = validate(mtp); // throw SORNAE here, if at all
+	 * out.writeHeader(mtp, mtp.inflatedSize);
+	 * out.write(raw);
+	 * 
+ * + * @param out + * stream the object should be written to. + * @param otp + * the object's saved representation information. + * @throws StoredObjectRepresentationNotAvailableException + * the previously selected representation is no longer + * available. If thrown before {@code out.writeHeader} the pack + * writer will try to find another representation, and write + * that one instead. If throw after {@code out.writeHeader}, + * packing will abort. + * @throws IOException + * the stream's write method threw an exception. Packing will + * abort. + */ + public void copyObjectAsIs(PackOutputStream out, ObjectToPack otp) + throws IOException, StoredObjectRepresentationNotAvailableException; } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/PackFile.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/PackFile.java index 829832e6a..25835e2ff 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/PackFile.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/PackFile.java @@ -48,7 +48,6 @@ import java.io.EOFException; import java.io.File; import java.io.IOException; -import java.io.OutputStream; import java.io.RandomAccessFile; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel.MapMode; @@ -58,13 +57,15 @@ import java.util.Comparator; import java.util.Iterator; import java.util.zip.CRC32; -import java.util.zip.CheckedOutputStream; import java.util.zip.DataFormatException; +import java.util.zip.Inflater; import org.eclipse.jgit.JGitText; import org.eclipse.jgit.errors.CorruptObjectException; import org.eclipse.jgit.errors.PackInvalidException; import org.eclipse.jgit.errors.PackMismatchException; +import org.eclipse.jgit.errors.StoredObjectRepresentationNotAvailableException; +import org.eclipse.jgit.util.LongList; import org.eclipse.jgit.util.NB; import org.eclipse.jgit.util.RawParseUtils; @@ -108,6 +109,15 @@ public int compare(final PackFile a, final PackFile b) { private PackReverseIndex reverseIdx; + /** + * Objects we have tried to read, and discovered to be corrupt. + *

+ * The list is allocated after the first corruption is found, and filled in + * as more entries are discovered. Typically this list is never used, as + * pack files do not usually contain corrupt objects. + */ + private volatile LongList corruptObjects; + /** * Construct a reader for an existing, pre-indexed packfile. * @@ -152,6 +162,10 @@ else if (!Arrays.equals(packChecksum, idx.packChecksum)) final PackedObjectLoader resolveBase(final WindowCursor curs, final long ofs) throws IOException { + if (isCorrupt(ofs)) { + throw new CorruptObjectException(MessageFormat.format(JGitText + .get().objectAtHasBadZlibStream, ofs, getPackFile())); + } return reader(curs, ofs); } @@ -174,7 +188,8 @@ public File getPackFile() { * the index file cannot be loaded into memory. */ public boolean hasObject(final AnyObjectId id) throws IOException { - return idx().hasObject(id); + final long offset = idx().findOffset(id); + return 0 < offset && !isCorrupt(offset); } /** @@ -192,7 +207,7 @@ public boolean hasObject(final AnyObjectId id) throws IOException { public PackedObjectLoader get(final WindowCursor curs, final AnyObjectId id) throws IOException { final long offset = idx().findOffset(id); - return 0 < offset ? reader(curs, offset) : null; + return 0 < offset && !isCorrupt(offset) ? reader(curs, offset) : null; } /** @@ -269,48 +284,163 @@ final byte[] decompress(final long position, final int totalSize, return dstbuf; } - final void copyRawData(final PackedObjectLoader loader, - final OutputStream out, final byte buf[], final WindowCursor curs) - throws IOException { - final long objectOffset = loader.objectOffset; - final long dataOffset = objectOffset + loader.headerSize; - final long sz = findEndOffset(objectOffset) - dataOffset; - final PackIndex idx = idx(); - - if (idx.hasCRC32Support()) { - final CRC32 crc = new CRC32(); - int headerCnt = loader.headerSize; - while (headerCnt > 0) { - final int toRead = Math.min(headerCnt, buf.length); - readFully(objectOffset, buf, 0, toRead, curs); - crc.update(buf, 0, toRead); - headerCnt -= toRead; - } - final CheckedOutputStream crcOut = new CheckedOutputStream(out, crc); - copyToStream(dataOffset, buf, sz, crcOut, curs); - final long computed = crc.getValue(); - - final ObjectId id = findObjectForOffset(objectOffset); - final long expected = idx.findCRC32(id); - if (computed != expected) - throw new CorruptObjectException(MessageFormat.format( - JGitText.get().objectAtHasBadZlibStream, objectOffset, getPackFile())); - } else { - try { - curs.inflateVerify(this, dataOffset); - } catch (DataFormatException dfe) { - final CorruptObjectException coe; - coe = new CorruptObjectException(MessageFormat.format( - JGitText.get().objectAtHasBadZlibStream, objectOffset, getPackFile())); - coe.initCause(dfe); - throw coe; - } - copyToStream(dataOffset, buf, sz, out, curs); + final void copyAsIs(PackOutputStream out, LocalObjectToPack src, + WindowCursor curs) throws IOException, + StoredObjectRepresentationNotAvailableException { + beginCopyAsIs(src); + try { + copyAsIs2(out, src, curs); + } finally { + endCopyAsIs(); } } - boolean supportsFastCopyRawData() throws IOException { - return idx().hasCRC32Support(); + private void copyAsIs2(PackOutputStream out, LocalObjectToPack src, + WindowCursor curs) throws IOException, + StoredObjectRepresentationNotAvailableException { + final CRC32 crc1 = new CRC32(); + final CRC32 crc2 = new CRC32(); + final byte[] buf = out.getCopyBuffer(); + + // Rip apart the header so we can discover the size. + // + readFully(src.copyOffset, buf, 0, 20, curs); + int c = buf[0] & 0xff; + final int typeCode = (c >> 4) & 7; + long inflatedLength = c & 15; + int shift = 4; + int headerCnt = 1; + while ((c & 0x80) != 0) { + c = buf[headerCnt++] & 0xff; + inflatedLength += (c & 0x7f) << shift; + shift += 7; + } + + if (typeCode == Constants.OBJ_OFS_DELTA) { + do { + c = buf[headerCnt++] & 0xff; + } while ((c & 128) != 0); + crc1.update(buf, 0, headerCnt); + crc2.update(buf, 0, headerCnt); + } else if (typeCode == Constants.OBJ_REF_DELTA) { + crc1.update(buf, 0, headerCnt); + crc2.update(buf, 0, headerCnt); + + readFully(src.copyOffset + headerCnt, buf, 0, 20, curs); + crc1.update(buf, 0, 20); + crc2.update(buf, 0, headerCnt); + headerCnt += 20; + } else { + crc1.update(buf, 0, headerCnt); + crc2.update(buf, 0, headerCnt); + } + + final long dataOffset = src.copyOffset + headerCnt; + final long dataLength; + final long expectedCRC; + + // Verify the object isn't corrupt before sending. If it is, + // we report it missing instead. + // + try { + dataLength = findEndOffset(src.copyOffset) - dataOffset; + + if (idx().hasCRC32Support()) { + // Index has the CRC32 code cached, validate the object. + // + expectedCRC = idx().findCRC32(src); + + long pos = dataOffset; + long cnt = dataLength; + while (cnt > 0) { + final int n = (int) Math.min(cnt, buf.length); + readFully(pos, buf, 0, n, curs); + crc1.update(buf, 0, n); + pos += n; + cnt -= n; + } + if (crc1.getValue() != expectedCRC) { + setCorrupt(src.copyOffset); + throw new CorruptObjectException(MessageFormat.format( + JGitText.get().objectAtHasBadZlibStream, + src.copyOffset, getPackFile())); + } + } else { + // We don't have a CRC32 code in the index, so compute it + // now while inflating the raw data to get zlib to tell us + // whether or not the data is safe. + // + long pos = dataOffset; + long cnt = dataLength; + Inflater inf = curs.inflater(); + byte[] tmp = new byte[1024]; + while (cnt > 0) { + final int n = (int) Math.min(cnt, buf.length); + readFully(pos, buf, 0, n, curs); + crc1.update(buf, 0, n); + inf.setInput(buf, 0, n); + while (inf.inflate(tmp, 0, tmp.length) > 0) + continue; + pos += n; + cnt -= n; + } + if (!inf.finished()) { + setCorrupt(src.copyOffset); + throw new EOFException(MessageFormat.format( + JGitText.get().shortCompressedStreamAt, + src.copyOffset)); + } + expectedCRC = crc1.getValue(); + } + } catch (DataFormatException dataFormat) { + setCorrupt(src.copyOffset); + + CorruptObjectException corruptObject = new CorruptObjectException( + MessageFormat.format( + JGitText.get().objectAtHasBadZlibStream, + src.copyOffset, getPackFile())); + corruptObject.initCause(dataFormat); + + StoredObjectRepresentationNotAvailableException gone; + gone = new StoredObjectRepresentationNotAvailableException(src); + gone.initCause(corruptObject); + throw gone; + + } catch (IOException ioError) { + StoredObjectRepresentationNotAvailableException gone; + gone = new StoredObjectRepresentationNotAvailableException(src); + gone.initCause(ioError); + throw gone; + } + + if (dataLength <= buf.length) { + // Tiny optimization: Lots of objects are very small deltas or + // deflated commits that are likely to fit in the copy buffer. + // + out.writeHeader(src, inflatedLength); + out.write(buf, 0, (int) dataLength); + } else { + // Now we are committed to sending the object. As we spool it out, + // check its CRC32 code to make sure there wasn't corruption between + // the verification we did above, and us actually outputting it. + // + out.writeHeader(src, inflatedLength); + long pos = dataOffset; + long cnt = dataLength; + while (cnt > 0) { + final int n = (int) Math.min(cnt, buf.length); + readFully(pos, buf, 0, n, curs); + crc2.update(buf, 0, n); + out.write(buf, 0, n); + pos += n; + cnt -= n; + } + if (crc2.getValue() != expectedCRC) { + throw new CorruptObjectException(MessageFormat.format(JGitText + .get().objectAtHasBadZlibStream, src.copyOffset, + getPackFile())); + } + } } boolean invalid() { @@ -324,24 +454,22 @@ private void readFully(final long position, final byte[] dstbuf, throw new EOFException(); } - private void copyToStream(long position, final byte[] buf, long cnt, - final OutputStream out, final WindowCursor curs) - throws IOException, EOFException { - while (cnt > 0) { - final int toRead = (int) Math.min(cnt, buf.length); - readFully(position, buf, 0, toRead, curs); - position += toRead; - cnt -= toRead; - out.write(buf, 0, toRead); + private synchronized void beginCopyAsIs(ObjectToPack otp) + throws StoredObjectRepresentationNotAvailableException { + if (++activeCopyRawData == 1 && activeWindows == 0) { + try { + doOpen(); + } catch (IOException thisPackNotValid) { + StoredObjectRepresentationNotAvailableException gone; + + gone = new StoredObjectRepresentationNotAvailableException(otp); + gone.initCause(thisPackNotValid); + throw gone; + } } } - synchronized void beginCopyRawData() throws IOException { - if (++activeCopyRawData == 1 && activeWindows == 0) - doOpen(); - } - - synchronized void endCopyRawData() { + private synchronized void endCopyAsIs() { if (--activeCopyRawData == 0 && activeWindows == 0) doClose(); } @@ -523,4 +651,29 @@ private synchronized PackReverseIndex getReverseIdx() throws IOException { reverseIdx = new PackReverseIndex(idx()); return reverseIdx; } + + private boolean isCorrupt(long offset) { + LongList list = corruptObjects; + if (list == null) + return false; + synchronized (list) { + return list.contains(offset); + } + } + + private void setCorrupt(long offset) { + LongList list = corruptObjects; + if (list == null) { + synchronized (readLock) { + list = corruptObjects; + if (list == null) { + list = new LongList(); + corruptObjects = list; + } + } + } + synchronized (list) { + list.add(offset); + } + } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/PackOutputStream.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/PackOutputStream.java index a348f1e54..48ec2b5a1 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/PackOutputStream.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/PackOutputStream.java @@ -49,35 +49,44 @@ import java.security.MessageDigest; import java.util.zip.CRC32; +import org.eclipse.jgit.util.NB; + /** Custom output stream to support {@link PackWriter}. */ -final class PackOutputStream extends OutputStream { +public final class PackOutputStream extends OutputStream { private final OutputStream out; + private final boolean ofsDelta; + private final CRC32 crc = new CRC32(); private final MessageDigest md = Constants.newMessageDigest(); private long count; - PackOutputStream(final OutputStream out) { + private byte[] headerBuffer = new byte[32]; + + private byte[] copyBuffer; + + PackOutputStream(final OutputStream out, final boolean ofsDelta) { this.out = out; + this.ofsDelta = ofsDelta; } @Override public void write(final int b) throws IOException { + count++; out.write(b); crc.update(b); md.update((byte) b); - count++; } @Override public void write(final byte[] b, final int off, final int len) throws IOException { + count += len; out.write(b, off, len); crc.update(b, off, len); md.update(b, off, len); - count += len; } @Override @@ -85,6 +94,79 @@ public void flush() throws IOException { out.flush(); } + void writeFileHeader(int version, int objectCount) throws IOException { + System.arraycopy(Constants.PACK_SIGNATURE, 0, headerBuffer, 0, 4); + NB.encodeInt32(headerBuffer, 4, version); + NB.encodeInt32(headerBuffer, 8, objectCount); + write(headerBuffer, 0, 12); + } + + /** + * Commits the object header onto the stream. + *

+ * Once the header has been written, the object representation must be fully + * output, or packing must abort abnormally. + * + * @param otp + * the object to pack. Header information is obtained. + * @param rawLength + * number of bytes of the inflated content. For an object that is + * in whole object format, this is the same as the object size. + * For an object that is in a delta format, this is the size of + * the inflated delta instruction stream. + * @throws IOException + * the underlying stream refused to accept the header. + */ + public void writeHeader(ObjectToPack otp, long rawLength) + throws IOException { + if (otp.isDeltaRepresentation()) { + if (ofsDelta) { + ObjectToPack baseInPack = otp.getDeltaBase(); + if (baseInPack != null && baseInPack.isWritten()) { + final long start = count; + int n = encodeTypeSize(Constants.OBJ_OFS_DELTA, rawLength); + write(headerBuffer, 0, n); + + long offsetDiff = start - baseInPack.getOffset(); + n = headerBuffer.length - 1; + headerBuffer[n] = (byte) (offsetDiff & 0x7F); + while ((offsetDiff >>= 7) > 0) + headerBuffer[--n] = (byte) (0x80 | (--offsetDiff & 0x7F)); + write(headerBuffer, n, headerBuffer.length - n); + return; + } + } + + int n = encodeTypeSize(Constants.OBJ_REF_DELTA, rawLength); + otp.getDeltaBaseId().copyRawTo(headerBuffer, n); + write(headerBuffer, 0, n + Constants.OBJECT_ID_LENGTH); + } else { + int n = encodeTypeSize(otp.getType(), rawLength); + write(headerBuffer, 0, n); + } + } + + private int encodeTypeSize(int type, long rawLength) { + long nextLength = rawLength >>> 4; + headerBuffer[0] = (byte) ((nextLength > 0 ? 0x80 : 0x00) + | (type << 4) | (rawLength & 0x0F)); + rawLength = nextLength; + int n = 1; + while (rawLength > 0) { + nextLength >>>= 7; + headerBuffer[n++] = (byte) ((nextLength > 0 ? 0x80 : 0x00) | (rawLength & 0x7F)); + rawLength = nextLength; + } + return n; + } + + /** @return a temporary buffer writers can use to copy data with. */ + public byte[] getCopyBuffer() { + if (copyBuffer == null) + copyBuffer = new byte[16 * 1024]; + return copyBuffer; + } + /** @return total number of bytes written since stream start. */ long length() { return count; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/PackWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/PackWriter.java index 462b12f39..80d8fff53 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/PackWriter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/PackWriter.java @@ -58,13 +58,14 @@ import java.util.zip.Deflater; import org.eclipse.jgit.JGitText; +import org.eclipse.jgit.errors.CorruptObjectException; import org.eclipse.jgit.errors.IncorrectObjectTypeException; import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.errors.StoredObjectRepresentationNotAvailableException; import org.eclipse.jgit.revwalk.ObjectWalk; import org.eclipse.jgit.revwalk.RevFlag; import org.eclipse.jgit.revwalk.RevObject; import org.eclipse.jgit.revwalk.RevSort; -import org.eclipse.jgit.util.NB; /** *

@@ -155,13 +156,13 @@ public class PackWriter { private static final int PACK_VERSION_GENERATED = 2; @SuppressWarnings("unchecked") - private final List objectsLists[] = new List[Constants.OBJ_TAG + 1]; + private final List objectsLists[] = new List[Constants.OBJ_TAG + 1]; { - objectsLists[0] = Collections. emptyList(); - objectsLists[Constants.OBJ_COMMIT] = new ArrayList(); - objectsLists[Constants.OBJ_TREE] = new ArrayList(); - objectsLists[Constants.OBJ_BLOB] = new ArrayList(); - objectsLists[Constants.OBJ_TAG] = new ArrayList(); + objectsLists[0] = Collections. emptyList(); + objectsLists[Constants.OBJ_COMMIT] = new ArrayList(); + objectsLists[Constants.OBJ_TREE] = new ArrayList(); + objectsLists[Constants.OBJ_BLOB] = new ArrayList(); + objectsLists[Constants.OBJ_TAG] = new ArrayList(); } private final ObjectIdSubclassMap objectsMap = new ObjectIdSubclassMap(); @@ -179,9 +180,10 @@ public class PackWriter { private ProgressMonitor writeMonitor; - private final byte[] buf = new byte[16384]; // 16 KB + private final ObjectReader reader; - private final WindowCursor windowCursor; + /** {@link #reader} recast to the reuse interface, if it supports it. */ + private final ObjectReuseAsIs reuseSupport; private List sortedByName; @@ -238,8 +240,12 @@ public PackWriter(final Repository repo, final ProgressMonitor monitor) { public PackWriter(final Repository repo, final ProgressMonitor imonitor, final ProgressMonitor wmonitor) { this.db = repo; - windowCursor = new WindowCursor((ObjectDirectory) repo - .getObjectDatabase()); + + reader = db.newObjectReader(); + if (reader instanceof ObjectReuseAsIs) + reuseSupport = ((ObjectReuseAsIs) reader); + else + reuseSupport = null; initMonitor = imonitor == null ? NullProgressMonitor.INSTANCE : imonitor; writeMonitor = wmonitor == null ? NullProgressMonitor.INSTANCE : wmonitor; @@ -525,6 +531,7 @@ public boolean willInclude(final AnyObjectId id) { * @return ObjectId representing SHA-1 name of a pack that was created. */ public ObjectId computeName() { + final byte[] buf = new byte[Constants.OBJECT_ID_LENGTH]; final MessageDigest md = Constants.newMessageDigest(); for (ObjectToPack otp : sortByName()) { otp.copyRawTo(buf, 0); @@ -560,8 +567,8 @@ public void writeIndex(final OutputStream indexStream) throws IOException { private List sortByName() { if (sortedByName == null) { sortedByName = new ArrayList(objectsMap.size()); - for (List list : objectsLists) { - for (LocalObjectToPack otp : list) + for (List list : objectsLists) { + for (ObjectToPack otp : list) sortedByName.add(otp); } Collections.sort(sortedByName); @@ -592,44 +599,38 @@ private List sortByName() { * stream. */ public void writePack(OutputStream packStream) throws IOException { - if (reuseDeltas || reuseObjects) + if ((reuseDeltas || reuseObjects) && reuseSupport != null) searchForReuse(); - out = new PackOutputStream(packStream); + out = new PackOutputStream(packStream, isDeltaBaseAsOffset()); writeMonitor.beginTask(WRITING_OBJECTS_PROGRESS, getObjectsNumber()); - writeHeader(); + out.writeFileHeader(PACK_VERSION_GENERATED, getObjectsNumber()); writeObjects(); writeChecksum(); - windowCursor.release(); + out = null; + reader.release(); writeMonitor.endTask(); } private void searchForReuse() throws IOException { initMonitor.beginTask(SEARCHING_REUSE_PROGRESS, getObjectsNumber()); - for (List list : objectsLists) { + for (List list : objectsLists) { for (ObjectToPack otp : list) { if (initMonitor.isCancelled()) throw new IOException( JGitText.get().packingCancelledDuringObjectsWriting); - windowCursor.selectObjectRepresentation(this, otp); + reuseSupport.selectObjectRepresentation(this, otp); initMonitor.update(1); } } initMonitor.endTask(); } - private void writeHeader() throws IOException { - System.arraycopy(Constants.PACK_SIGNATURE, 0, buf, 0, 4); - NB.encodeInt32(buf, 4, PACK_VERSION_GENERATED); - NB.encodeInt32(buf, 8, getObjectsNumber()); - out.write(buf, 0, 12); - } - private void writeObjects() throws IOException { - for (List list : objectsLists) { - for (LocalObjectToPack otp : list) { + for (List list : objectsLists) { + for (ObjectToPack otp : list) { if (writeMonitor.isCancelled()) throw new IOException( JGitText.get().packingCancelledDuringObjectsWriting); @@ -639,74 +640,88 @@ private void writeObjects() throws IOException { } } - private void writeObject(final LocalObjectToPack otp) throws IOException { - otp.markWantWrite(); - if (otp.isDeltaRepresentation()) { - LocalObjectToPack deltaBase = (LocalObjectToPack)otp.getDeltaBase(); - assert deltaBase != null || thin; - if (deltaBase != null && !deltaBase.isWritten()) { - if (deltaBase.wantWrite()) { - otp.clearDeltaBase(); // cycle detected - otp.clearReuseAsIs(); - } else { - writeObject(deltaBase); - } - } - } + private void writeObject(final ObjectToPack otp) throws IOException { + if (otp.isWritten()) + return; // We shouldn't be here. - assert !otp.isWritten(); + otp.markWantWrite(); + if (otp.isDeltaRepresentation()) + writeBaseFirst(otp); out.resetCRC32(); otp.setOffset(out.length()); - final PackedObjectLoader reuse = open(otp); - if (reuse != null) { + while (otp.isReuseAsIs()) { try { - if (otp.isDeltaRepresentation()) - writeDeltaObjectHeader(otp, reuse); - else - writeObjectHeader(otp.getType(), reuse.getSize()); - reuse.copyRawData(out, buf, windowCursor); - } finally { - reuse.endCopyRawData(); + reuseSupport.copyObjectAsIs(out, otp); + otp.setCRC(out.getCRC32()); + writeMonitor.update(1); + return; + } catch (StoredObjectRepresentationNotAvailableException gone) { + if (otp.getOffset() == out.length()) { + redoSearchForReuse(otp); + continue; + } else { + // Object writing already started, we cannot recover. + // + CorruptObjectException coe; + coe = new CorruptObjectException(otp, ""); + coe.initCause(gone); + throw coe; + } } - } else if (otp.isDeltaRepresentation()) { - throw new IOException(JGitText.get().creatingDeltasIsNotImplemented); - } else { - writeWholeObjectDeflate(otp); } - otp.setCRC(out.getCRC32()); + // If we reached here, reuse wasn't possible. + // + writeWholeObjectDeflate(otp); + otp.setCRC(out.getCRC32()); writeMonitor.update(1); } - private PackedObjectLoader open(final LocalObjectToPack otp) throws IOException { - while (otp.isReuseAsIs()) { - try { - PackedObjectLoader reuse = otp.getCopyLoader(windowCursor); - reuse.beginCopyRawData(); - return reuse; - } catch (IOException err) { - // The pack we found the object in originally is gone, or - // it has been overwritten with a different layout. - // - otp.clearDeltaBase(); - otp.clearReuseAsIs(); - windowCursor.selectObjectRepresentation(this, otp); - continue; + private void writeBaseFirst(final ObjectToPack otp) throws IOException { + ObjectToPack baseInPack = otp.getDeltaBase(); + if (baseInPack != null) { + if (!baseInPack.isWritten()) { + if (baseInPack.wantWrite()) { + // There is a cycle. Our caller is trying to write the + // object we want as a base, and called us. Turn off + // delta reuse so we can find another form. + // + reuseDeltas = false; + redoSearchForReuse(otp); + reuseDeltas = true; + } else { + writeObject(baseInPack); + } } + } else if (!thin) { + // This should never occur, the base isn't in the pack and + // the pack isn't allowed to reference base outside objects. + // Write the object as a whole form, even if that is slow. + // + otp.clearDeltaBase(); + otp.clearReuseAsIs(); } - return null; + } + + private void redoSearchForReuse(final ObjectToPack otp) throws IOException, + MissingObjectException { + otp.clearDeltaBase(); + otp.clearReuseAsIs(); + reuseSupport.selectObjectRepresentation(this, otp); } private void writeWholeObjectDeflate(final ObjectToPack otp) throws IOException { - final ObjectLoader loader = db.openObject(windowCursor, otp); + final ObjectLoader loader = db.openObject(reader, otp); final byte[] data = loader.getCachedBytes(); - writeObjectHeader(otp.getType(), data.length); + out.writeHeader(otp, data.length); deflater.reset(); deflater.setInput(data, 0, data.length); deflater.finish(); + + byte[] buf = out.getCopyBuffer(); do { final int n = deflater.deflate(buf, 0, buf.length); if (n > 0) @@ -714,42 +729,6 @@ private void writeWholeObjectDeflate(final ObjectToPack otp) } while (!deflater.finished()); } - private void writeDeltaObjectHeader(final ObjectToPack otp, - final PackedObjectLoader reuse) throws IOException { - if (deltaBaseAsOffset && otp.getDeltaBase() != null) { - writeObjectHeader(Constants.OBJ_OFS_DELTA, reuse.getRawSize()); - - final ObjectToPack deltaBase = otp.getDeltaBase(); - long offsetDiff = otp.getOffset() - deltaBase.getOffset(); - int pos = buf.length - 1; - buf[pos] = (byte) (offsetDiff & 0x7F); - while ((offsetDiff >>= 7) > 0) { - buf[--pos] = (byte) (0x80 | (--offsetDiff & 0x7F)); - } - - out.write(buf, pos, buf.length - pos); - } else { - writeObjectHeader(Constants.OBJ_REF_DELTA, reuse.getRawSize()); - otp.getDeltaBaseId().copyRawTo(buf, 0); - out.write(buf, 0, Constants.OBJECT_ID_LENGTH); - } - } - - private void writeObjectHeader(final int objectType, long dataLength) - throws IOException { - long nextLength = dataLength >>> 4; - int size = 0; - buf[size++] = (byte) ((nextLength > 0 ? 0x80 : 0x00) - | (objectType << 4) | (dataLength & 0x0F)); - dataLength = nextLength; - while (dataLength > 0) { - nextLength >>>= 7; - buf[size++] = (byte) ((nextLength > 0 ? 0x80 : 0x00) | (dataLength & 0x7F)); - dataLength = nextLength; - } - out.write(buf, 0, size); - } - private void writeChecksum() throws IOException { packcsum = out.getDigest(); out.write(packcsum); @@ -824,7 +803,11 @@ public void addObject(final RevObject object) return; } - final LocalObjectToPack otp = windowCursor.newObjectToPack(object); + final ObjectToPack otp; + if (reuseSupport != null) + otp = reuseSupport.newObjectToPack(object); + else + otp = new ObjectToPack(object); try { objectsLists[object.getType()].add(otp); } catch (ArrayIndexOutOfBoundsException x) { diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/PackedObjectLoader.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/PackedObjectLoader.java index 026b008f1..47f5e67a7 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/PackedObjectLoader.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/PackedObjectLoader.java @@ -47,7 +47,6 @@ package org.eclipse.jgit.lib; import java.io.IOException; -import java.io.OutputStream; /** * Base class for a set of object loader classes for packed objects. @@ -115,69 +114,6 @@ public final long getObjectOffset() { return objectOffset; } - /** - * Peg the pack file open to support data copying. - *

- * Applications trying to copy raw pack data should ensure the pack stays - * open and available throughout the entire copy. To do that use: - * - *

-	 * loader.beginCopyRawData();
-	 * try {
-	 * 	loader.copyRawData(out, tmpbuf, curs);
-	 * } finally {
-	 * 	loader.endCopyRawData();
-	 * }
-	 * 
- * - * @throws IOException - * this loader contains stale information and cannot be used. - * The most likely cause is the underlying pack file has been - * deleted, and the object has moved to another pack file. - */ - public void beginCopyRawData() throws IOException { - pack.beginCopyRawData(); - } - - /** - * Copy raw object representation from storage to provided output stream. - *

- * Copied data doesn't include object header. User must provide temporary - * buffer used during copying by underlying I/O layer. - *

- * - * @param out - * output stream when data is copied. No buffering is guaranteed. - * @param buf - * temporary buffer used during copying. Recommended size is at - * least few kB. - * @param curs - * temporary thread storage during data access. - * @throws IOException - * when the object cannot be read. - * @see #beginCopyRawData() - */ - public void copyRawData(OutputStream out, byte buf[], WindowCursor curs) - throws IOException { - pack.copyRawData(this, out, buf, curs); - } - - /** Release resources after {@link #beginCopyRawData()}. */ - public void endCopyRawData() { - pack.endCopyRawData(); - } - - /** - * @return true if this loader is capable of fast raw-data copying basing on - * compressed data checksum; false if raw-data copying needs - * uncompressing and compressing data - * @throws IOException - * the index file format cannot be determined. - */ - public boolean supportsFastCopyRawData() throws IOException { - return pack.supportsFastCopyRawData(); - } - /** * @return id of delta base object for this object representation. null if * object is not stored as delta. diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/WindowCursor.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/WindowCursor.java index e16735492..36095ed5e 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/WindowCursor.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/WindowCursor.java @@ -49,6 +49,7 @@ import java.util.zip.Inflater; import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.errors.StoredObjectRepresentationNotAvailableException; import org.eclipse.jgit.revwalk.RevObject; /** Active handle to a ByteWindow. */ @@ -90,6 +91,12 @@ public void selectObjectRepresentation(PackWriter packer, ObjectToPack otp) db.selectObjectRepresentation(packer, otp, this); } + public void copyObjectAsIs(PackOutputStream out, ObjectToPack otp) + throws IOException, StoredObjectRepresentationNotAvailableException { + LocalObjectToPack src = (LocalObjectToPack) otp; + src.copyFromPack.copyAsIs(out, src, this); + } + /** * Copy bytes from the window to a caller supplied buffer. * @@ -159,16 +166,9 @@ int inflate(final PackFile pack, long position, final byte[] dstbuf, } } - void inflateVerify(final PackFile pack, long position) throws IOException, - DataFormatException { + Inflater inflater() { prepareInflater(); - for (;;) { - pin(pack, position); - window.inflateVerify(position, inf); - if (inf.finished()) - return; - position = window.end; - } + return inf; } private void prepareInflater() { diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/LongList.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/LongList.java index 26608bb2a..96b311dfb 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/util/LongList.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/LongList.java @@ -83,6 +83,20 @@ public long get(final int i) { return entries[i]; } + /** + * Determine if an entry appears in this collection. + * + * @param value + * the value to search for. + * @return true of {@code value} appears in this list. + */ + public boolean contains(final long value) { + for (int i = 0; i < count; i++) + if (entries[i] == value) + return true; + return false; + } + /** Empty this list */ public void clear() { count = 0;